Merge from trunk
@ -40,6 +40,7 @@ recipes/.gitignore
|
||||
recipes/README.md
|
||||
recipes/icon_checker.py
|
||||
recipes/readme_updater.py
|
||||
recipes/garfield.recipe
|
||||
recipes/katalog_egazeciarz.recipe
|
||||
recipes/tv_axnscifi.recipe
|
||||
recipes/tv_comedycentral.recipe
|
||||
@ -63,6 +64,7 @@ recipes/tv_tvppolonia.recipe
|
||||
recipes/tv_tvpuls.recipe
|
||||
recipes/tv_viasathistory.recipe
|
||||
recipes/icons/katalog_egazeciarz.png
|
||||
recipes/icons/garfield.png
|
||||
recipes/icons/tv_axnscifi.png
|
||||
recipes/icons/tv_comedycentral.png
|
||||
recipes/icons/tv_discoveryscience.png
|
||||
|
50
COPYRIGHT
@ -79,13 +79,6 @@ License: GPL2+
|
||||
The full text of the GPL is distributed as in
|
||||
/usr/share/common-licenses/GPL-2 on Debian systems.
|
||||
|
||||
Files: src/pyPdf/*
|
||||
Copyright: Copyright (c) 2006, Mathieu Fenniak
|
||||
Copyright: Copyright (c) 2007, Ashish Kulkarni <kulkarni.ashish@gmail.com>
|
||||
License: BSD
|
||||
The full text of the BSD license is distributed as in
|
||||
/usr/share/common-licenses/BSD on Debian systems.
|
||||
|
||||
Files: src/calibre/utils/lzx/*
|
||||
Copyright: Copyright (C) 2002, Matthew T. Russotto
|
||||
Copyright: Copyright (C) 2008, Marshall T. Vandegrift <llasram@gmail.com>
|
||||
@ -100,49 +93,6 @@ License: BSD
|
||||
The full text of the BSD license is distributed as in
|
||||
/usr/share/common-licenses/BSD on Debian systems.
|
||||
|
||||
Files: src/calibre/utils/pyparsing.py
|
||||
Copyright: Copyright (c) 2003-2008, Paul T. McGuire
|
||||
License: MIT
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
Files: src/calibre/utils/PythonMagickWand.py
|
||||
Copyright: (c) 2007 - Achim Domma - domma@procoders.net
|
||||
License: MIT
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
||||
|
||||
Files: src/calibre/utils/msdes/d3des.h:
|
||||
Files: src/calibre/utils/msdes/des.c:
|
||||
Copyright: Copyright (C) 1988,1989,1990,1991,1992, Richard Outerbridge
|
||||
|
114
Changelog.yaml
@ -1,4 +1,4 @@
|
||||
# vim:fileencoding=UTF-8:ts=2:sw=2:sta:et:sts=2:ai
|
||||
# vim:fileencoding=utf-8:ts=2:sw=2:sta:et:sts=2:ai
|
||||
# Each release can have new features and bug fixes. Each of which
|
||||
# must have a title and can optionally have linked tickets and a description.
|
||||
# In addition they can have a type field which defaults to minor, but should be major
|
||||
@ -20,6 +20,118 @@
|
||||
# new recipes:
|
||||
# - title:
|
||||
|
||||
- version: 0.9.27
|
||||
date: 2013-04-12
|
||||
|
||||
new features:
|
||||
- title: "Metadata download: Add two new sources for covers: Google Image Search and bigbooksearch.com."
|
||||
description: "To enable them go to Preferences->Metadata download and enable the 'Google Image' and 'Big Book Search' sources. Google Images is useful for finding larger covers as well as alternate versions of the cover. Big Book Search searches for alternate covers from amazon.com. It can occasionally find nicer covers than the direct Amazon source. Note that both these sources download multiple covers for a single book. Some of these covers can be wrong (i.e. they may be of a different book or not covers at all, so you should inspect the results and manually pick the best match). When bulk downloading, these sources are only used if the other sources find no covers."
|
||||
type: major
|
||||
|
||||
- title: "Content server: Allow specifying a restriction to use for the server when embedding it as a WSGI app."
|
||||
tickets: [1167951]
|
||||
|
||||
- title: "Get Books: Add a plugin for the Koobe Polish book store"
|
||||
|
||||
- title: "calibredb add_format: Add an option to not replace existing formats. Also pep8 compliance."
|
||||
|
||||
- title: "Allow restoring of the ORIGINAL_XXX format by right-clicking it in the book details panel"
|
||||
|
||||
bug fixes:
|
||||
- title: "AZW3 Input: Do not fail to identify JPEG images with 8BIM headers created with Adobe Photoshop."
|
||||
tickets: [1167985]
|
||||
|
||||
- title: "Amazon metadata download: Ignore Spanish edition entries when searching for a book on amazon.com"
|
||||
|
||||
- title: "TXT Input: When converting a txt file with a Byte Order Mark, remove the Byte Order Mark before further processing as it can cause the first line of the text to be mis-interpreted."
|
||||
|
||||
- title: "Get Books: Fix searching for current book/title/author by right clicking the get books icon"
|
||||
|
||||
- title: "Get Books: Update nexto, gutenberg, and virtualo store plugins for website changes"
|
||||
|
||||
- title: "Amazon metadata download: When downloading from amazon.co.jp handle the 'Black curtain redirect' for adult titles."
|
||||
tickets: [1165628]
|
||||
|
||||
- title: "When extracting zip files do not allow maliciously created zip files to overwrite other files on the system"
|
||||
|
||||
- title: "RTF Input: Handle RTF files with invalid border style specifications"
|
||||
tickets: [1021270]
|
||||
|
||||
improved recipes:
|
||||
- The Escapist
|
||||
- San Francisco Chronicle
|
||||
- The Onion
|
||||
- Fronda
|
||||
- Tom's Hardware
|
||||
- New Yorker
|
||||
- Financial Times UK
|
||||
- Business Week Magazine
|
||||
- Victoria Times
|
||||
- tvxs
|
||||
- The Independent
|
||||
|
||||
new recipes:
|
||||
- title: Economia
|
||||
author: Manish Bhattarai
|
||||
|
||||
- title: Universe Today
|
||||
author: seird
|
||||
|
||||
- title: The Galaxy's Edge
|
||||
author: Krittika Goyal
|
||||
|
||||
- version: 0.9.26
|
||||
date: 2013-04-05
|
||||
|
||||
new features:
|
||||
- title: "PDF Output: Allow using templates to create arbitrary headers and footers. Look under PDF Output in the conversion dialog for this feature."
|
||||
|
||||
- title: "ToC Editor: Allow generating the ToC directly from individual files inside the ebook. Useful for EPUBs that have individual chapters in single files."
|
||||
tickets: [1163520]
|
||||
|
||||
- title: "ToC Editor: Add buttons to indent/unindent the current entry"
|
||||
|
||||
- title: "ToC Editor: Right-click menu to perform various useful actions on entries in the ToC"
|
||||
|
||||
- title: "Column icons: Allow use of wide images as column icons"
|
||||
|
||||
- title: "Add USB ids for the Palm Pre2 and Samsung Galaxy phone to the device drivers"
|
||||
tickets: [1162293,1163115]
|
||||
|
||||
bug fixes:
|
||||
- title: "PDF Output: Fix generating page numbers causing links to not work."
|
||||
tickets: [1162573]
|
||||
|
||||
- title: "Wrong filename output in error message when 'Guide reference not found'"
|
||||
tickets: [1163659]
|
||||
|
||||
- title: "Get Books: Update Amazon, Barnes & Noble, Waterstones and Gutenberg store plugins for website change"
|
||||
|
||||
- title: "PDF Output: Fix 1 pixel wide left and top margins on the cover page for some PDF conversions due to incorrect rounding."
|
||||
tickets: [1162054]
|
||||
|
||||
- title: "ToC Editor: Fix drag and drop of multiple items resulting in the dropped items being in random order sometimes."
|
||||
tickets: [1161999]
|
||||
|
||||
improved recipes:
|
||||
- Financial Times UK
|
||||
- Sing Tao Daily
|
||||
- Apple Daily
|
||||
- A List Apart
|
||||
- Business Week
|
||||
- Harpers printed edition
|
||||
- Harvard Business Review
|
||||
|
||||
new recipes:
|
||||
- title: AM730
|
||||
author: Eddie Lau
|
||||
|
||||
- title: Arret sur images
|
||||
author: Francois D
|
||||
|
||||
- title: Diario de Noticias
|
||||
author: Jose Pinto
|
||||
|
||||
- version: 0.9.25
|
||||
date: 2013-03-29
|
||||
|
||||
|
@ -436,8 +436,8 @@ generate a Table of Contents in the converted ebook, based on the actual content
|
||||
|
||||
.. note:: Using these options can be a little challenging to get exactly right.
|
||||
If you prefer creating/editing the Table of Contents by hand, convert to
|
||||
the EPUB or AZW3 formats and select the checkbox at the bottom of the
|
||||
screen that says
|
||||
the EPUB or AZW3 formats and select the checkbox at the bottom of the Table
|
||||
of Contents section of the conversion dialog that says
|
||||
:guilabel:`Manually fine-tune the Table of Contents after conversion`.
|
||||
This will launch the ToC Editor tool after the conversion. It allows you to
|
||||
create entries in the Table of Contents by simply clicking the place in the
|
||||
|
@ -647,12 +647,17 @@ computers. Run |app| on a single computer and access it via the Content Server
|
||||
or a Remote Desktop solution.
|
||||
|
||||
If you must share the actual library, use a file syncing tool like
|
||||
DropBox or rsync or Microsoft SkyDrive instead of a networked drive. Even with
|
||||
these tools there is danger of data corruption/loss, so only do this if you are
|
||||
willing to live with that risk. In particular, be aware that **Google Drive**
|
||||
is incompatible with |app|, if you put your |app| library in Google Drive, you
|
||||
*will* suffer data loss. See
|
||||
`this thread <http://www.mobileread.com/forums/showthread.php?t=205581>`_ for details.
|
||||
DropBox or rsync or Microsoft SkyDrive instead of a networked drive. If you are
|
||||
using a file-syncing tool it is **essential** that you make sure that both
|
||||
|app| and the file syncing tool do not try to access the |app| library at the
|
||||
same time. In other words, **do not** run the file syncing tool and |app| at
|
||||
the same time.
|
||||
|
||||
Even with these tools there is danger of data corruption/loss, so only do this
|
||||
if you are willing to live with that risk. In particular, be aware that
|
||||
**Google Drive** is incompatible with |app|, if you put your |app| library in
|
||||
Google Drive, **you will suffer data loss**. See `this thread
|
||||
<http://www.mobileread.com/forums/showthread.php?t=205581>`_ for details.
|
||||
|
||||
Content From The Web
|
||||
---------------------
|
||||
@ -797,6 +802,12 @@ Downloading from the Internet can sometimes result in a corrupted download. If t
|
||||
* Try temporarily disabling your antivirus program (Microsoft Security Essentials, or Kaspersky or Norton or McAfee or whatever). This is most likely the culprit if the upgrade process is hanging in the middle.
|
||||
* Try rebooting your computer and running a registry cleaner like `Wise registry cleaner <http://www.wisecleaner.com>`_.
|
||||
* Try downloading the installer with an alternate browser. For example if you are using Internet Explorer, try using Firefox or Chrome instead.
|
||||
* If you get an error about a missing DLL on windows, then most likely, the
|
||||
permissions on your temporary folder are incorrect. Go to the folder
|
||||
:file:`C:\\Users\\USERNAME\\AppData\\Local` in Windows explorer and then
|
||||
right click on the :file:`Temp` folder and select :guilabel:`Properties` and go to
|
||||
the :guilabel:`Security` tab. Make sure that your user account has full control
|
||||
for this folder.
|
||||
|
||||
If you still cannot get the installer to work and you are on windows, you can use the `calibre portable install <http://calibre-ebook.com/download_portable>`_, which does not need an installer (it is just a zip file).
|
||||
|
||||
|
@ -367,6 +367,8 @@ For example::
|
||||
|
||||
date:>10daysago
|
||||
date:<=45daysago
|
||||
|
||||
To avoid potential problems with translated strings when using a non-English version of calibre, the strings ``_today``, ``_yesterday``, ``_thismonth``, and ``_daysago`` are always available. They are not translated.
|
||||
|
||||
You can search for books that have a format of a certain size like this::
|
||||
|
||||
|
@ -91,7 +91,11 @@ First, we have to create a WSGI *adapter* for the calibre content server. Here i
|
||||
# Path to the calibre library to be served
|
||||
# The server process must have write permission for all files/dirs
|
||||
# in this directory or BAD things will happen
|
||||
path_to_library='/home/kovid/documents/demo library'
|
||||
path_to_library='/home/kovid/documents/demo library',
|
||||
|
||||
# The virtual library (restriction) to be used when serving this
|
||||
# library.
|
||||
virtual_library=None
|
||||
)
|
||||
|
||||
del create_wsgi_app
|
||||
|
@ -9,14 +9,14 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||
__author__ = 'Dave Asbury'
|
||||
cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/161987_9010212100_2035706408_n.jpg'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 12
|
||||
max_articles_per_feed = 20
|
||||
linearize_tables = True
|
||||
remove_empty_feeds = True
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
auto_cleanup = True
|
||||
language = 'en_GB'
|
||||
|
||||
compress_news_images = True
|
||||
cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/161987_9010212100_2035706408_n.jpg'
|
||||
|
||||
masthead_url = 'http://www.trinitymirror.com/images/birminghampost-logo.gif'
|
||||
|
@ -1,3 +1,4 @@
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from collections import OrderedDict
|
||||
|
||||
@ -39,7 +40,7 @@ class BusinessWeekMagazine(BasicNewsRecipe):
|
||||
title=self.tag_to_string(div.a).strip()
|
||||
url=div.a['href']
|
||||
soup0 = self.index_to_soup(url)
|
||||
urlprint=soup0.find('li', attrs={'class':'print tracked'}).a['href']
|
||||
urlprint=soup0.find('a', attrs={'href':re.compile('.*printer.*')})['href']
|
||||
articles.append({'title':title, 'url':urlprint, 'description':'', 'date':''})
|
||||
|
||||
|
||||
@ -56,7 +57,7 @@ class BusinessWeekMagazine(BasicNewsRecipe):
|
||||
title=self.tag_to_string(div.a).strip()
|
||||
url=div.a['href']
|
||||
soup0 = self.index_to_soup(url)
|
||||
urlprint=soup0.find('li', attrs={'class':'print tracked'}).a['href']
|
||||
urlprint=soup0.find('a', attrs={'href':re.compile('.*printer.*')})['href']
|
||||
articles.append({'title':title, 'url':urlprint, 'description':desc, 'date':''})
|
||||
|
||||
if articles:
|
||||
|
@ -7,13 +7,14 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
||||
#cover_url = 'http://www.countryfile.com/sites/default/files/imagecache/160px_wide/cover/2_1.jpg'
|
||||
__author__ = 'Dave Asbury'
|
||||
description = 'The official website of Countryfile Magazine'
|
||||
# last updated 8/12/12
|
||||
# last updated 19/10/12
|
||||
language = 'en_GB'
|
||||
oldest_article = 30
|
||||
max_articles_per_feed = 25
|
||||
remove_empty_feeds = True
|
||||
no_stylesheets = True
|
||||
auto_cleanup = True
|
||||
compress_news_images = True
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
#articles_are_obfuscated = True
|
||||
#article_already_exists = False
|
||||
|
@ -13,9 +13,9 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
|
||||
|
||||
masthead_url = 'http://www.nmauk.co.uk/nma/images/daily_mirror.gif'
|
||||
|
||||
|
||||
compress_news_images = True
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 1
|
||||
max_articles_per_feed = 12
|
||||
remove_empty_feeds = True
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
23
recipes/diario_de_noticias.recipe
Normal file
@ -0,0 +1,23 @@
|
||||
# vim:fileencoding=UTF-8
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1365070687(BasicNewsRecipe):
|
||||
title ='Diário de Notícias'
|
||||
oldest_article = 7
|
||||
language = 'pt'
|
||||
__author__ = 'Jose Pinto'
|
||||
max_articles_per_feed = 100
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'cln-esqmid'}) ]
|
||||
remove_tags = [ dict(name='table', attrs={'class':'TabFerramentasInf'}) ]
|
||||
|
||||
feeds = [(u'Portugal', u'http://feeds.dn.pt/DN-Portugal'),
|
||||
(u'Globo', u'http://feeds.dn.pt/DN-Globo'),
|
||||
(u'Economia', u'http://feeds.dn.pt/DN-Economia'),
|
||||
(u'Ci\xeancia', u'http://feeds.dn.pt/DN-Ciencia'),
|
||||
(u'Artes', u'http://feeds.dn.pt/DN-Artes'),
|
||||
(u'TV & Media', u'http://feeds.dn.pt/DN-Media'),
|
||||
(u'Opini\xe3o', u'http://feeds.dn.pt/DN-Opiniao'),
|
||||
(u'Pessoas', u'http://feeds.dn.pt/DN-Pessoas')
|
||||
]
|
@ -11,22 +11,22 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class EcoGeek(BasicNewsRecipe):
|
||||
title = 'EcoGeek'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'EcoGeek - Technology for the Environment Blog Feed'
|
||||
description = 'EcoGeek - Technology for the Environment Blog Feed'
|
||||
publisher = 'EcoGeek'
|
||||
language = 'en'
|
||||
|
||||
category = 'news, ecology, blog'
|
||||
oldest_article = 7
|
||||
oldest_article = 30
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = True
|
||||
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
feeds = [(u'Posts', u'http://feeds2.feedburner.com/EcoGeek')]
|
||||
|
||||
|
17
recipes/economia.recipe
Normal file
@ -0,0 +1,17 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1314326622(BasicNewsRecipe):
|
||||
title = u'Economia'
|
||||
__author__ = 'Manish Bhattarai'
|
||||
description = 'Economia - Intelligence & Insight for ICAEW Members'
|
||||
language = 'en_GB'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 25
|
||||
masthead_url = 'http://economia.icaew.com/~/media/Images/Design%20Images/Economia_Red_website.ashx'
|
||||
cover_url = 'http://economia.icaew.com/~/media/Images/Design%20Images/Economia_Red_website.ashx'
|
||||
no_stylesheets = True
|
||||
remove_empty_feeds = True
|
||||
remove_tags_before = dict(id='content')
|
||||
remove_tags_after = dict(id='stars-wrapper')
|
||||
remove_tags = [dict(attrs={'class':['floatR', 'sharethis', 'rating clearfix']})]
|
||||
feeds = [(u'News', u'http://feedity.com/icaew-com/VlNTVFRa.rss'),(u'Business', u'http://feedity.com/icaew-com/VlNTVFtS.rss'),(u'People', u'http://feedity.com/icaew-com/VlNTVFtX.rss'),(u'Opinion', u'http://feedity.com/icaew-com/VlNTVFtW.rss'),(u'Finance', u'http://feedity.com/icaew-com/VlNTVFtV.rss')]
|
@ -12,12 +12,6 @@ class EsensjaRSS(BasicNewsRecipe):
|
||||
language = 'pl'
|
||||
encoding = 'utf-8'
|
||||
INDEX = 'http://www.esensja.pl'
|
||||
extra_css = '''.t-title {font-size: x-large; font-weight: bold; text-align: left}
|
||||
.t-author {font-size: x-small; text-align: left}
|
||||
.t-title2 {font-size: x-small; font-style: italic; text-align: left}
|
||||
.text {font-size: small; text-align: left}
|
||||
.annot-ref {font-style: italic; text-align: left}
|
||||
'''
|
||||
cover_url = ''
|
||||
masthead_url = 'http://esensja.pl/img/wrss.gif'
|
||||
use_embedded_content = False
|
||||
|
@ -1,13 +1,14 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010-2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2010-2013, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.ft.com/uk-edition
|
||||
www.ft.com/intl/uk-edition
|
||||
'''
|
||||
|
||||
import datetime
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from collections import OrderedDict
|
||||
|
||||
class FinancialTimes(BasicNewsRecipe):
|
||||
title = 'Financial Times (UK)'
|
||||
@ -28,7 +29,7 @@ class FinancialTimes(BasicNewsRecipe):
|
||||
masthead_url = 'http://im.media.ft.com/m/img/masthead_main.jpg'
|
||||
LOGIN = 'https://registration.ft.com/registration/barrier/login'
|
||||
LOGIN2 = 'http://media.ft.com/h/subs3.html'
|
||||
INDEX = 'http://www.ft.com/uk-edition'
|
||||
INDEX = 'http://www.ft.com/intl/uk-edition'
|
||||
PREFIX = 'http://www.ft.com'
|
||||
|
||||
conversion_options = {
|
||||
@ -93,7 +94,7 @@ class FinancialTimes(BasicNewsRecipe):
|
||||
try:
|
||||
urlverified = self.browser.open_novisit(url).geturl() # resolve redirect.
|
||||
except:
|
||||
continue
|
||||
continue
|
||||
title = self.tag_to_string(item)
|
||||
date = strftime(self.timefmt)
|
||||
articles.append({
|
||||
@ -105,29 +106,30 @@ class FinancialTimes(BasicNewsRecipe):
|
||||
return articles
|
||||
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
feeds = OrderedDict()
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
dates= self.tag_to_string(soup.find('div', attrs={'class':'btm-links'}).find('div'))
|
||||
self.timefmt = ' [%s]'%dates
|
||||
wide = soup.find('div',attrs={'class':'wide'})
|
||||
if not wide:
|
||||
return feeds
|
||||
allsections = wide.findAll(attrs={'class':lambda x: x and 'footwell' in x.split()})
|
||||
if not allsections:
|
||||
return feeds
|
||||
count = 0
|
||||
for item in allsections:
|
||||
count = count + 1
|
||||
if self.test and count > 2:
|
||||
return feeds
|
||||
fitem = item.h3
|
||||
if not fitem:
|
||||
fitem = item.h4
|
||||
ftitle = self.tag_to_string(fitem)
|
||||
self.report_progress(0, _('Fetching feed')+' %s...'%(ftitle))
|
||||
feedarts = self.get_artlinks(item.ul)
|
||||
feeds.append((ftitle,feedarts))
|
||||
return feeds
|
||||
#dates= self.tag_to_string(soup.find('div', attrs={'class':'btm-links'}).find('div'))
|
||||
#self.timefmt = ' [%s]'%dates
|
||||
section_title = 'Untitled'
|
||||
|
||||
for column in soup.findAll('div', attrs = {'class':'feedBoxes clearfix'}):
|
||||
for section in column. findAll('div', attrs = {'class':'feedBox'}):
|
||||
sectiontitle=self.tag_to_string(section.find('h4'))
|
||||
if '...' not in sectiontitle: section_title=sectiontitle
|
||||
for article in section.ul.findAll('li'):
|
||||
articles = []
|
||||
title=self.tag_to_string(article.a)
|
||||
url=article.a['href']
|
||||
articles.append({'title':title, 'url':url, 'description':'', 'date':''})
|
||||
|
||||
if articles:
|
||||
if section_title not in feeds:
|
||||
feeds[section_title] = []
|
||||
feeds[section_title] += articles
|
||||
|
||||
|
||||
ans = [(key, val) for key, val in feeds.iteritems()]
|
||||
return ans
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
items = ['promo-box','promo-title',
|
||||
@ -174,9 +176,6 @@ class FinancialTimes(BasicNewsRecipe):
|
||||
count += 1
|
||||
tfile = PersistentTemporaryFile('_fa.html')
|
||||
tfile.write(html)
|
||||
tfile.close()
|
||||
tfile.close()
|
||||
self.temp_files.append(tfile)
|
||||
return tfile.name
|
||||
|
||||
def cleanup(self):
|
||||
self.browser.open('https://registration.ft.com/registration/login/logout?location=')
|
@ -1,20 +1,21 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2010-2013, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
http://www.ft.com/intl/us-edition
|
||||
www.ft.com/intl/international-edition
|
||||
'''
|
||||
|
||||
import datetime
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from collections import OrderedDict
|
||||
|
||||
class FinancialTimes(BasicNewsRecipe):
|
||||
title = 'Financial Times (US) printed edition'
|
||||
title = 'Financial Times (International) printed edition'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = "The Financial Times (FT) is one of the world's leading business news and information organisations, recognised internationally for its authority, integrity and accuracy."
|
||||
publisher = 'The Financial Times Ltd.'
|
||||
category = 'news, finances, politics, UK, World'
|
||||
category = 'news, finances, politics, World'
|
||||
oldest_article = 2
|
||||
language = 'en'
|
||||
max_articles_per_feed = 250
|
||||
@ -28,7 +29,7 @@ class FinancialTimes(BasicNewsRecipe):
|
||||
masthead_url = 'http://im.media.ft.com/m/img/masthead_main.jpg'
|
||||
LOGIN = 'https://registration.ft.com/registration/barrier/login'
|
||||
LOGIN2 = 'http://media.ft.com/h/subs3.html'
|
||||
INDEX = 'http://www.ft.com/intl/us-edition'
|
||||
INDEX = 'http://www.ft.com/intl/international-edition'
|
||||
PREFIX = 'http://www.ft.com'
|
||||
|
||||
conversion_options = {
|
||||
@ -93,7 +94,7 @@ class FinancialTimes(BasicNewsRecipe):
|
||||
try:
|
||||
urlverified = self.browser.open_novisit(url).geturl() # resolve redirect.
|
||||
except:
|
||||
continue
|
||||
continue
|
||||
title = self.tag_to_string(item)
|
||||
date = strftime(self.timefmt)
|
||||
articles.append({
|
||||
@ -105,29 +106,30 @@ class FinancialTimes(BasicNewsRecipe):
|
||||
return articles
|
||||
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
feeds = OrderedDict()
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
dates= self.tag_to_string(soup.find('div', attrs={'class':'btm-links'}).find('div'))
|
||||
self.timefmt = ' [%s]'%dates
|
||||
wide = soup.find('div',attrs={'class':'wide'})
|
||||
if not wide:
|
||||
return feeds
|
||||
allsections = wide.findAll(attrs={'class':lambda x: x and 'footwell' in x.split()})
|
||||
if not allsections:
|
||||
return feeds
|
||||
count = 0
|
||||
for item in allsections:
|
||||
count = count + 1
|
||||
if self.test and count > 2:
|
||||
return feeds
|
||||
fitem = item.h3
|
||||
if not fitem:
|
||||
fitem = item.h4
|
||||
ftitle = self.tag_to_string(fitem)
|
||||
self.report_progress(0, _('Fetching feed')+' %s...'%(ftitle))
|
||||
feedarts = self.get_artlinks(item.ul)
|
||||
feeds.append((ftitle,feedarts))
|
||||
return feeds
|
||||
#dates= self.tag_to_string(soup.find('div', attrs={'class':'btm-links'}).find('div'))
|
||||
#self.timefmt = ' [%s]'%dates
|
||||
section_title = 'Untitled'
|
||||
|
||||
for column in soup.findAll('div', attrs = {'class':'feedBoxes clearfix'}):
|
||||
for section in column. findAll('div', attrs = {'class':'feedBox'}):
|
||||
sectiontitle=self.tag_to_string(section.find('h4'))
|
||||
if '...' not in sectiontitle: section_title=sectiontitle
|
||||
for article in section.ul.findAll('li'):
|
||||
articles = []
|
||||
title=self.tag_to_string(article.a)
|
||||
url=article.a['href']
|
||||
articles.append({'title':title, 'url':url, 'description':'', 'date':''})
|
||||
|
||||
if articles:
|
||||
if section_title not in feeds:
|
||||
feeds[section_title] = []
|
||||
feeds[section_title] += articles
|
||||
|
||||
|
||||
ans = [(key, val) for key, val in feeds.iteritems()]
|
||||
return ans
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
items = ['promo-box','promo-title',
|
||||
@ -174,9 +176,6 @@ class FinancialTimes(BasicNewsRecipe):
|
||||
count += 1
|
||||
tfile = PersistentTemporaryFile('_fa.html')
|
||||
tfile.write(html)
|
||||
tfile.close()
|
||||
tfile.close()
|
||||
self.temp_files.append(tfile)
|
||||
return tfile.name
|
||||
|
||||
def cleanup(self):
|
||||
self.browser.open('https://registration.ft.com/registration/login/logout?location=')
|
53
recipes/forbes_pl.recipe
Normal file
@ -0,0 +1,53 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import datetime
|
||||
import re
|
||||
|
||||
class forbes_pl(BasicNewsRecipe):
|
||||
title = u'Forbes.pl'
|
||||
__author__ = 'Artur Stachecki <artur.stachecki@gmail.com>'
|
||||
language = 'pl'
|
||||
description = u'Biznes, finanse, gospodarka, strategie, wiadomości gospodarcze, analizy finasowe i strategiczne.'
|
||||
oldest_article = 1
|
||||
index = 'http://www.forbes.pl'
|
||||
cover_url = 'http://www.forbes.pl/resources/front/images/logo.png'
|
||||
max_articles_per_feed = 100
|
||||
extra_css = '.Block-Photo {float:left; max-width: 300px; margin-right: 5px;}'
|
||||
preprocess_regexps = [(re.compile(ur'<p>(<strong>)?(Czytaj|Zobacz) (też|także):.*?</p>', re.DOTALL), lambda match: ''), (re.compile(ur'<strong>Zobacz:.*?</strong>', re.DOTALL), lambda match: '')]
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
now = datetime.datetime.now()
|
||||
yesterday = now - datetime.timedelta(hours=24)
|
||||
yesterday = yesterday.strftime("%d.%m.%Y %H:%M:%S")
|
||||
pages_count = 4
|
||||
keep_only_tags = [dict(attrs={'class':['Block-Node Content-Article ', 'Block-Node Content-Article piano-closed']})]
|
||||
remove_tags = [dict(attrs={'class':['Keywords Styled', 'twitter-share-button', 'Block-List-Related Block-List']})]
|
||||
|
||||
feeds = [(u'Wszystkie', 'http://www.forbes.pl/rss')]
|
||||
|
||||
'''def preprocess_html(self, soup):
|
||||
self.append_page(soup, soup.body)
|
||||
return soup
|
||||
|
||||
|
||||
def append_page(self, soup, appendtag):
|
||||
cleanup = False
|
||||
nexturl = appendtag.find('a', attrs={'class':'next'})
|
||||
if nexturl:
|
||||
cleanup = True
|
||||
while nexturl:
|
||||
soup2 = self.index_to_soup(self.index + nexturl['href'])
|
||||
nexturl = soup2.find('a', attrs={'class':'next'})
|
||||
pagetext = soup2.findAll(id='article-body-wrapper')
|
||||
if not pagetext:
|
||||
pagetext = soup2.findAll(attrs={'class':'Article-Entry Styled'})
|
||||
for comment in pagetext.findAll(text=lambda text:isinstance(text, Comment)):
|
||||
comment.extract()
|
||||
pos = len(appendtag.contents)
|
||||
appendtag.insert(pos, pagetext)
|
||||
if cleanup:
|
||||
for r in appendtag.findAll(attrs={'class':'paginator'}):
|
||||
r.extract()'''
|
@ -6,6 +6,7 @@ __copyright__ = u'2010-2013, Tomasz Dlugosz <tomek3d@gmail.com>'
|
||||
fronda.pl
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from datetime import timedelta, date
|
||||
|
||||
@ -23,6 +24,7 @@ class Fronda(BasicNewsRecipe):
|
||||
extra_css = '''
|
||||
h1 {font-size:150%}
|
||||
.body {text-align:left;}
|
||||
div#featured-image {font-style:italic; font-size:70%}
|
||||
'''
|
||||
|
||||
earliest_date = date.today() - timedelta(days=oldest_article)
|
||||
@ -55,7 +57,10 @@ class Fronda(BasicNewsRecipe):
|
||||
articles = {}
|
||||
|
||||
for url, genName in genres:
|
||||
soup = self.index_to_soup('http://www.fronda.pl/c/'+ url)
|
||||
try:
|
||||
soup = self.index_to_soup('http://www.fronda.pl/c/'+ url)
|
||||
except:
|
||||
continue
|
||||
articles[genName] = []
|
||||
for item in soup.findAll('li'):
|
||||
article_h = item.find('h2')
|
||||
@ -77,16 +82,15 @@ class Fronda(BasicNewsRecipe):
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['related-articles',
|
||||
'button right',
|
||||
'pagination']}),
|
||||
dict(name='div', attrs={'class':['related-articles','button right','pagination','related-articles content']}),
|
||||
dict(name='h3', attrs={'class':'block-header article comments'}),
|
||||
dict(name='ul', attrs={'class':'comment-list'}),
|
||||
dict(name='ul', attrs={'class':'category'}),
|
||||
dict(name='ul', attrs={'class':'tag-list'}),
|
||||
dict(name='ul', attrs={'class':['comment-list','category','tag-list']}),
|
||||
dict(name='p', attrs={'id':'comments-disclaimer'}),
|
||||
dict(name='div', attrs={'style':'text-align: left; margin-bottom: 15px;'}),
|
||||
dict(name='div', attrs={'style':'text-align: left; margin-top: 15px; margin-bottom: 30px;'}),
|
||||
dict(name='div', attrs={'class':'related-articles content'}),
|
||||
dict(name='div', attrs={'id':'comment-form'})
|
||||
dict(name='div', attrs={'id':'comment-form'}),
|
||||
dict(name='span', attrs={'class':'separator'})
|
||||
]
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'komentarzy: .*?</h6>', re.IGNORECASE | re.DOTALL | re.M ), lambda match: '</h6>')]
|
||||
|
108
recipes/galaxys_edge.recipe
Normal file
@ -0,0 +1,108 @@
|
||||
from __future__ import with_statement
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class GalaxyEdge(BasicNewsRecipe):
|
||||
title = u'The Galaxy\'s Edge'
|
||||
language = 'en'
|
||||
|
||||
oldest_article = 7
|
||||
__author__ = 'Krittika Goyal'
|
||||
no_stylesheets = True
|
||||
|
||||
auto_cleanup = True
|
||||
|
||||
#keep_only_tags = [dict(id='content')]
|
||||
#remove_tags = [dict(attrs={'class':['article-links', 'breadcr']}),
|
||||
#dict(id=['email-section', 'right-column', 'printfooter', 'topover',
|
||||
#'slidebox', 'th_footer'])]
|
||||
|
||||
extra_css = '.photo-caption { font-size: smaller }'
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup('http://www.galaxysedge.com/')
|
||||
main = soup.find('table', attrs={'width':'911'})
|
||||
toc = main.find('td', attrs={'width':'225'})
|
||||
|
||||
|
||||
|
||||
current_section = None
|
||||
current_articles = []
|
||||
feeds = []
|
||||
c = 0
|
||||
for x in toc.findAll(['p']):
|
||||
c = c+1
|
||||
if c == 5:
|
||||
if current_articles and current_section:
|
||||
feeds.append((current_section, current_articles))
|
||||
edwo = x.find('a')
|
||||
current_section = self.tag_to_string(edwo)
|
||||
current_articles = []
|
||||
self.log('\tFound section:', current_section)
|
||||
title = self.tag_to_string(edwo)
|
||||
url = edwo.get('href', True)
|
||||
url = 'http://www.galaxysedge.com/'+url
|
||||
print(title)
|
||||
print(c)
|
||||
if not url or not title:
|
||||
continue
|
||||
self.log('\t\tFound article:', title)
|
||||
self.log('\t\t\t', url)
|
||||
current_articles.append({'title': title, 'url':url,
|
||||
'description':'', 'date':''})
|
||||
elif c>5:
|
||||
current_section = self.tag_to_string(x.find('b'))
|
||||
current_articles = []
|
||||
self.log('\tFound section:', current_section)
|
||||
for y in x.findAll('a'):
|
||||
title = self.tag_to_string(y)
|
||||
url = y.get('href', True)
|
||||
url = 'http://www.galaxysedge.com/'+url
|
||||
print(title)
|
||||
if not url or not title:
|
||||
continue
|
||||
self.log('\t\tFound article:', title)
|
||||
self.log('\t\t\t', url)
|
||||
current_articles.append({'title': title, 'url':url,
|
||||
'description':'', 'date':''})
|
||||
if current_articles and current_section:
|
||||
feeds.append((current_section, current_articles))
|
||||
|
||||
return feeds
|
||||
|
||||
|
||||
|
||||
|
||||
#def preprocess_raw_html(self, raw, url):
|
||||
#return raw.replace('<body><p>', '<p>').replace('</p></body>', '</p>')
|
||||
|
||||
#def postprocess_html(self, soup, first_fetch):
|
||||
#for t in soup.findAll(['table', 'tr', 'td','center']):
|
||||
#t.name = 'div'
|
||||
#return soup
|
||||
|
||||
#def parse_index(self):
|
||||
#today = time.strftime('%Y-%m-%d')
|
||||
#soup = self.index_to_soup(
|
||||
#'http://www.thehindu.com/todays-paper/tp-index/?date=' + today)
|
||||
#div = soup.find(id='left-column')
|
||||
#feeds = []
|
||||
#current_section = None
|
||||
#current_articles = []
|
||||
#for x in div.findAll(['h3', 'div']):
|
||||
#if current_section and x.get('class', '') == 'tpaper':
|
||||
#a = x.find('a', href=True)
|
||||
#if a is not None:
|
||||
#current_articles.append({'url':a['href']+'?css=print',
|
||||
#'title':self.tag_to_string(a), 'date': '',
|
||||
#'description':''})
|
||||
#if x.name == 'h3':
|
||||
#if current_section and current_articles:
|
||||
#feeds.append((current_section, current_articles))
|
||||
#current_section = self.tag_to_string(x)
|
||||
#current_articles = []
|
||||
#return feeds
|
||||
|
||||
|
@ -10,7 +10,7 @@ krakow.gazeta.pl
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class gw_krakow(BasicNewsRecipe):
|
||||
title = u'Gazeta.pl Kraków'
|
||||
title = u'Gazeta Wyborcza Kraków'
|
||||
__author__ = 'teepel <teepel44@gmail.com> based on GW from fenuks'
|
||||
language = 'pl'
|
||||
description =u'Wiadomości z Krakowa na portalu Gazeta.pl.'
|
||||
|
@ -5,7 +5,7 @@ import string
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class GazetaPlSzczecin(BasicNewsRecipe):
|
||||
title = u'Gazeta.pl Szczecin'
|
||||
title = u'Gazeta Wyborcza Szczecin'
|
||||
description = u'Wiadomości ze Szczecina na portalu Gazeta.pl.'
|
||||
__author__ = u'Michał Szkutnik'
|
||||
__license__ = u'GPL v3'
|
||||
|
@ -10,7 +10,7 @@ warszawa.gazeta.pl
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class gw_wawa(BasicNewsRecipe):
|
||||
title = u'Gazeta.pl Warszawa'
|
||||
title = u'Gazeta Wyborcza Warszawa'
|
||||
__author__ = 'teepel <teepel44@gmail.com> based on GW from fenuks'
|
||||
language = 'pl'
|
||||
description ='Wiadomości z Warszawy na portalu Gazeta.pl.'
|
||||
|
@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Comment
|
||||
|
||||
class Gazeta_Wyborcza(BasicNewsRecipe):
|
||||
title = u'Gazeta.pl'
|
||||
title = u'Gazeta Wyborcza'
|
||||
__author__ = 'fenuks, Artur Stachecki'
|
||||
language = 'pl'
|
||||
description = 'Wiadomości z Polski i ze świata. Serwisy tematyczne i lokalne w 20 miastach.'
|
||||
|
@ -1,90 +0,0 @@
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class GiveMeSomethingToRead(BasicNewsRecipe):
|
||||
title = u'Give Me Something To Read'
|
||||
description = 'Curation / aggregation of articles on diverse topics'
|
||||
language = 'en'
|
||||
__author__ = 'barty on mobileread.com forum'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = False
|
||||
timefmt = ' [%a, %d %b, %Y]'
|
||||
oldest_article = 365
|
||||
auto_cleanup = True
|
||||
INDEX = 'http://givemesomethingtoread.com'
|
||||
CATEGORIES = [
|
||||
# comment out categories you don't want
|
||||
# (user friendly name, system name, max number of articles to load)
|
||||
('The Arts','arts',25),
|
||||
('Science','science',30),
|
||||
('Technology','technology',30),
|
||||
('Politics','politics',20),
|
||||
('Media','media',30),
|
||||
('Crime','crime',15),
|
||||
('Other articles','',10)
|
||||
]
|
||||
|
||||
def parse_index(self):
|
||||
self.cover_url = 'http://thegretchenshow.files.wordpress.com/2009/12/well-read-cat-small.jpg'
|
||||
feeds = []
|
||||
seen_urls = set([])
|
||||
regex = re.compile( r'http://(www\.)?([^/:]+)', re.I)
|
||||
|
||||
for category in self.CATEGORIES:
|
||||
|
||||
(cat_name, tag, max_articles) = category
|
||||
|
||||
tagurl = '' if tag=='' else '/tagged/'+tag
|
||||
self.log('Reading category:', cat_name)
|
||||
|
||||
articles = []
|
||||
pageno = 1
|
||||
|
||||
while len(articles) < max_articles and pageno < 100:
|
||||
|
||||
page = "%s%s/page/%d" % (self.INDEX, tagurl, pageno) if pageno > 1 else self.INDEX + tagurl
|
||||
pageno += 1
|
||||
|
||||
self.log('\tReading page:', page)
|
||||
try:
|
||||
soup = self.index_to_soup(page)
|
||||
except:
|
||||
break
|
||||
|
||||
headers = soup.findAll('h2')
|
||||
if len(headers) == .0:
|
||||
break
|
||||
|
||||
for header in headers:
|
||||
atag = header.find('a')
|
||||
url = atag['href']
|
||||
# skip promotionals and duplicate
|
||||
if url.startswith('http://givemesomethingtoread') or url.startswith('/') or url in seen_urls:
|
||||
continue
|
||||
seen_urls.add(url)
|
||||
title = self.tag_to_string(header)
|
||||
self.log('\tFound article:', title)
|
||||
#self.log('\t', url)
|
||||
desc = header.parent.find('blockquote')
|
||||
desc = self.tag_to_string(desc) if desc else ''
|
||||
m = regex.match( url)
|
||||
if m:
|
||||
desc = "[%s] %s" % (m.group(2), desc)
|
||||
#self.log('\t', desc)
|
||||
date = ''
|
||||
p = header.parent.previousSibling
|
||||
# navigate up to find h3, which contains the date
|
||||
while p:
|
||||
if hasattr(p,'name') and p.name == 'h3':
|
||||
date = self.tag_to_string(p)
|
||||
break
|
||||
p = p.previousSibling
|
||||
articles.append({'title':title,'url':url,'description':desc,'date':date})
|
||||
if len(articles) >= max_articles:
|
||||
break
|
||||
|
||||
if articles:
|
||||
feeds.append((cat_name, articles))
|
||||
|
||||
return feeds
|
||||
|
@ -1,448 +1,229 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = 'Copyright 2010 Starson17'
|
||||
'''
|
||||
www.gocomics.com
|
||||
'''
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import mechanize, re
|
||||
|
||||
class GoComics(BasicNewsRecipe):
|
||||
title = 'GoComics'
|
||||
|
||||
class Comics(BasicNewsRecipe):
|
||||
title = 'Comics.com'
|
||||
__author__ = 'Starson17'
|
||||
__version__ = '1.06'
|
||||
__date__ = '07 June 2011'
|
||||
description = u'200+ Comics - Customize for more days/comics: Defaults to 7 days, 25 comics - 20 general, 5 editorial.'
|
||||
category = 'news, comics'
|
||||
description = 'Comics from comics.com. You should customize this recipe to fetch only the comics you are interested in'
|
||||
language = 'en'
|
||||
use_embedded_content= False
|
||||
no_stylesheets = True
|
||||
oldest_article = 24
|
||||
remove_javascript = True
|
||||
cover_url = 'http://paulbuckley14059.files.wordpress.com/2008/06/calvin-and-hobbes.jpg'
|
||||
remove_attributes = ['style']
|
||||
|
||||
####### USER PREFERENCES - COMICS, IMAGE SIZE AND NUMBER OF COMICS TO RETRIEVE ########
|
||||
# num_comics_to_get - I've tried up to 99 on Calvin&Hobbes
|
||||
cover_url = 'http://www.bsb.lib.tx.us/images/comics.com.gif'
|
||||
recursions = 0
|
||||
max_articles_per_feed = 10
|
||||
num_comics_to_get = 7
|
||||
# comic_size 300 is small, 600 is medium, 900 is large, 1500 is extra-large
|
||||
comic_size = 900
|
||||
# CHOOSE COMIC STRIPS BELOW - REMOVE COMMENT '# ' FROM IN FRONT OF DESIRED STRIPS
|
||||
# Please do not overload their servers by selecting all comics and 1000 strips from each!
|
||||
simultaneous_downloads = 1
|
||||
# delay = 3
|
||||
|
||||
conversion_options = {'linearize_tables' : True
|
||||
, 'comment' : description
|
||||
, 'tags' : category
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['feature','banner']}),
|
||||
keep_only_tags = [dict(name='h1'),
|
||||
dict(name='p', attrs={'class':'feature_item'})
|
||||
]
|
||||
|
||||
remove_tags = [dict(name='a', attrs={'class':['beginning','prev','cal','next','newest']}),
|
||||
dict(name='div', attrs={'class':['tag-wrapper']}),
|
||||
dict(name='a', attrs={'href':re.compile(r'.*mutable_[0-9]+', re.IGNORECASE)}),
|
||||
dict(name='img', attrs={'src':re.compile(r'.*mutable_[0-9]+', re.IGNORECASE)}),
|
||||
dict(name='ul', attrs={'class':['share-nav','feature-nav']}),
|
||||
]
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
cookies = mechanize.CookieJar()
|
||||
br = mechanize.build_opener(mechanize.HTTPCookieProcessor(cookies))
|
||||
br.addheaders = [('Referer','http://www.gocomics.com/')]
|
||||
return br
|
||||
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
for title, url in [
|
||||
(u"2 Cows and a Chicken", u"http://www.gocomics.com/2cowsandachicken"),
|
||||
#(u"9 Chickweed Lane", u"http://www.gocomics.com/9chickweedlane"),
|
||||
(u"9 to 5", u"http://www.gocomics.com/9to5"),
|
||||
#(u"Adam At Home", u"http://www.gocomics.com/adamathome"),
|
||||
(u"Agnes", u"http://www.gocomics.com/agnes"),
|
||||
#(u"Alley Oop", u"http://www.gocomics.com/alleyoop"),
|
||||
#(u"Andy Capp", u"http://www.gocomics.com/andycapp"),
|
||||
#(u"Animal Crackers", u"http://www.gocomics.com/animalcrackers"),
|
||||
#(u"Annie", u"http://www.gocomics.com/annie"),
|
||||
#(u"Arlo & Janis", u"http://www.gocomics.com/arloandjanis"),
|
||||
#(u"Ask Shagg", u"http://www.gocomics.com/askshagg"),
|
||||
(u"B.C.", u"http://www.gocomics.com/bc"),
|
||||
#(u"Back in the Day", u"http://www.gocomics.com/backintheday"),
|
||||
#(u"Bad Reporter", u"http://www.gocomics.com/badreporter"),
|
||||
#(u"Baldo", u"http://www.gocomics.com/baldo"),
|
||||
#(u"Ballard Street", u"http://www.gocomics.com/ballardstreet"),
|
||||
#(u"Barkeater Lake", u"http://www.gocomics.com/barkeaterlake"),
|
||||
#(u"Basic Instructions", u"http://www.gocomics.com/basicinstructions"),
|
||||
#(u"Ben", u"http://www.gocomics.com/ben"),
|
||||
#(u"Betty", u"http://www.gocomics.com/betty"),
|
||||
#(u"Bewley", u"http://www.gocomics.com/bewley"),
|
||||
#(u"Big Nate", u"http://www.gocomics.com/bignate"),
|
||||
#(u"Big Top", u"http://www.gocomics.com/bigtop"),
|
||||
#(u"Biographic", u"http://www.gocomics.com/biographic"),
|
||||
#(u"Birdbrains", u"http://www.gocomics.com/birdbrains"),
|
||||
#(u"Bleeker: The Rechargeable Dog", u"http://www.gocomics.com/bleeker"),
|
||||
#(u"Bliss", u"http://www.gocomics.com/bliss"),
|
||||
(u"Bloom County", u"http://www.gocomics.com/bloomcounty"),
|
||||
#(u"Bo Nanas", u"http://www.gocomics.com/bonanas"),
|
||||
#(u"Bob the Squirrel", u"http://www.gocomics.com/bobthesquirrel"),
|
||||
#(u"Boomerangs", u"http://www.gocomics.com/boomerangs"),
|
||||
#(u"Bottomliners", u"http://www.gocomics.com/bottomliners"),
|
||||
#(u"Bound and Gagged", u"http://www.gocomics.com/boundandgagged"),
|
||||
#(u"Brainwaves", u"http://www.gocomics.com/brainwaves"),
|
||||
#(u"Brenda Starr", u"http://www.gocomics.com/brendastarr"),
|
||||
#(u"Brevity", u"http://www.gocomics.com/brevity"),
|
||||
#(u"Brewster Rockit", u"http://www.gocomics.com/brewsterrockit"),
|
||||
#(u"Broom Hilda", u"http://www.gocomics.com/broomhilda"),
|
||||
(u"Calvin and Hobbes", u"http://www.gocomics.com/calvinandhobbes"),
|
||||
#(u"Candorville", u"http://www.gocomics.com/candorville"),
|
||||
#(u"Cathy", u"http://www.gocomics.com/cathy"),
|
||||
#(u"C'est la Vie", u"http://www.gocomics.com/cestlavie"),
|
||||
#(u"Cheap Thrills", u"http://www.gocomics.com/cheapthrills"),
|
||||
#(u"Chuckle Bros", u"http://www.gocomics.com/chucklebros"),
|
||||
#(u"Citizen Dog", u"http://www.gocomics.com/citizendog"),
|
||||
#(u"Cleats", u"http://www.gocomics.com/cleats"),
|
||||
#(u"Close to Home", u"http://www.gocomics.com/closetohome"),
|
||||
#(u"Committed", u"http://www.gocomics.com/committed"),
|
||||
#(u"Compu-toon", u"http://www.gocomics.com/compu-toon"),
|
||||
#(u"Cornered", u"http://www.gocomics.com/cornered"),
|
||||
#(u"Cow & Boy", u"http://www.gocomics.com/cow&boy"),
|
||||
#(u"Cul de Sac", u"http://www.gocomics.com/culdesac"),
|
||||
#(u"Daddy's Home", u"http://www.gocomics.com/daddyshome"),
|
||||
#(u"Deep Cover", u"http://www.gocomics.com/deepcover"),
|
||||
#(u"Dick Tracy", u"http://www.gocomics.com/dicktracy"),
|
||||
(u"Dog Eat Doug", u"http://www.gocomics.com/dogeatdoug"),
|
||||
#(u"Domestic Abuse", u"http://www.gocomics.com/domesticabuse"),
|
||||
(u"Doodles", u"http://www.gocomics.com/doodles"),
|
||||
(u"Doonesbury", u"http://www.gocomics.com/doonesbury"),
|
||||
#(u"Drabble", u"http://www.gocomics.com/drabble"),
|
||||
#(u"Eek!", u"http://www.gocomics.com/eek"),
|
||||
#(u"F Minus", u"http://www.gocomics.com/fminus"),
|
||||
#(u"Family Tree", u"http://www.gocomics.com/familytree"),
|
||||
#(u"Farcus", u"http://www.gocomics.com/farcus"),
|
||||
(u"Fat Cats Classics", u"http://www.gocomics.com/fatcatsclassics"),
|
||||
#(u"Ferd'nand", u"http://www.gocomics.com/ferdnand"),
|
||||
#(u"Flight Deck", u"http://www.gocomics.com/flightdeck"),
|
||||
(u"Flo and Friends", u"http://www.gocomics.com/floandfriends"),
|
||||
#(u"For Better or For Worse", u"http://www.gocomics.com/forbetterorforworse"),
|
||||
#(u"For Heaven's Sake", u"http://www.gocomics.com/forheavenssake"),
|
||||
#(u"Fort Knox", u"http://www.gocomics.com/fortknox"),
|
||||
#(u"FoxTrot Classics", u"http://www.gocomics.com/foxtrotclassics"),
|
||||
(u"FoxTrot", u"http://www.gocomics.com/foxtrot"),
|
||||
#(u"Frank & Ernest", u"http://www.gocomics.com/frankandernest"),
|
||||
#(u"Frazz", u"http://www.gocomics.com/frazz"),
|
||||
#(u"Fred Basset", u"http://www.gocomics.com/fredbasset"),
|
||||
#(u"Free Range", u"http://www.gocomics.com/freerange"),
|
||||
#(u"Frog Applause", u"http://www.gocomics.com/frogapplause"),
|
||||
#(u"Garfield Minus Garfield", u"http://www.gocomics.com/garfieldminusgarfield"),
|
||||
(u"Garfield", u"http://www.gocomics.com/garfield"),
|
||||
#(u"Gasoline Alley", u"http://www.gocomics.com/gasolinealley"),
|
||||
#(u"Geech Classics", u"http://www.gocomics.com/geechclassics"),
|
||||
#(u"Get Fuzzy", u"http://www.gocomics.com/getfuzzy"),
|
||||
#(u"Gil Thorp", u"http://www.gocomics.com/gilthorp"),
|
||||
#(u"Ginger Meggs", u"http://www.gocomics.com/gingermeggs"),
|
||||
#(u"Girls & Sports", u"http://www.gocomics.com/girlsandsports"),
|
||||
#(u"Graffiti", u"http://www.gocomics.com/graffiti"),
|
||||
#(u"Grand Avenue", u"http://www.gocomics.com/grandavenue"),
|
||||
#(u"Haiku Ewe", u"http://www.gocomics.com/haikuewe"),
|
||||
#(u"Heart of the City", u"http://www.gocomics.com/heartofthecity"),
|
||||
(u"Heathcliff", u"http://www.gocomics.com/heathcliff"),
|
||||
#(u"Herb and Jamaal", u"http://www.gocomics.com/herbandjamaal"),
|
||||
#(u"Herman", u"http://www.gocomics.com/herman"),
|
||||
#(u"Home and Away", u"http://www.gocomics.com/homeandaway"),
|
||||
#(u"Housebroken", u"http://www.gocomics.com/housebroken"),
|
||||
#(u"Hubert and Abby", u"http://www.gocomics.com/hubertandabby"),
|
||||
#(u"Imagine This", u"http://www.gocomics.com/imaginethis"),
|
||||
#(u"In the Bleachers", u"http://www.gocomics.com/inthebleachers"),
|
||||
#(u"In the Sticks", u"http://www.gocomics.com/inthesticks"),
|
||||
#(u"Ink Pen", u"http://www.gocomics.com/inkpen"),
|
||||
#(u"It's All About You", u"http://www.gocomics.com/itsallaboutyou"),
|
||||
#(u"Jane's World", u"http://www.gocomics.com/janesworld"),
|
||||
#(u"Joe Vanilla", u"http://www.gocomics.com/joevanilla"),
|
||||
#(u"Jump Start", u"http://www.gocomics.com/jumpstart"),
|
||||
#(u"Kit 'N' Carlyle", u"http://www.gocomics.com/kitandcarlyle"),
|
||||
#(u"La Cucaracha", u"http://www.gocomics.com/lacucaracha"),
|
||||
#(u"Last Kiss", u"http://www.gocomics.com/lastkiss"),
|
||||
#(u"Legend of Bill", u"http://www.gocomics.com/legendofbill"),
|
||||
#(u"Liberty Meadows", u"http://www.gocomics.com/libertymeadows"),
|
||||
#(u"Li'l Abner Classics", u"http://www.gocomics.com/lilabnerclassics"),
|
||||
#(u"Lio", u"http://www.gocomics.com/lio"),
|
||||
#(u"Little Dog Lost", u"http://www.gocomics.com/littledoglost"),
|
||||
#(u"Little Otto", u"http://www.gocomics.com/littleotto"),
|
||||
#(u"Lola", u"http://www.gocomics.com/lola"),
|
||||
#(u"Loose Parts", u"http://www.gocomics.com/looseparts"),
|
||||
#(u"Love Is...", u"http://www.gocomics.com/loveis"),
|
||||
#(u"Luann", u"http://www.gocomics.com/luann"),
|
||||
#(u"Maintaining", u"http://www.gocomics.com/maintaining"),
|
||||
(u"Marmaduke", u"http://www.gocomics.com/marmaduke"),
|
||||
#(u"Meg! Classics", u"http://www.gocomics.com/megclassics"),
|
||||
#(u"Middle-Aged White Guy", u"http://www.gocomics.com/middleagedwhiteguy"),
|
||||
#(u"Minimum Security", u"http://www.gocomics.com/minimumsecurity"),
|
||||
#(u"Moderately Confused", u"http://www.gocomics.com/moderatelyconfused"),
|
||||
(u"Momma", u"http://www.gocomics.com/momma"),
|
||||
#(u"Monty", u"http://www.gocomics.com/monty"),
|
||||
#(u"Motley Classics", u"http://www.gocomics.com/motleyclassics"),
|
||||
(u"Mutt & Jeff", u"http://www.gocomics.com/muttandjeff"),
|
||||
#(u"Mythtickle", u"http://www.gocomics.com/mythtickle"),
|
||||
#(u"Nancy", u"http://www.gocomics.com/nancy"),
|
||||
#(u"Natural Selection", u"http://www.gocomics.com/naturalselection"),
|
||||
#(u"Nest Heads", u"http://www.gocomics.com/nestheads"),
|
||||
#(u"NEUROTICA", u"http://www.gocomics.com/neurotica"),
|
||||
#(u"New Adventures of Queen Victoria", u"http://www.gocomics.com/thenewadventuresofqueenvictoria"),
|
||||
#(u"Non Sequitur", u"http://www.gocomics.com/nonsequitur"),
|
||||
#(u"Off The Mark", u"http://www.gocomics.com/offthemark"),
|
||||
#(u"On A Claire Day", u"http://www.gocomics.com/onaclaireday"),
|
||||
#(u"One Big Happy Classics", u"http://www.gocomics.com/onebighappyclassics"),
|
||||
#(u"One Big Happy", u"http://www.gocomics.com/onebighappy"),
|
||||
#(u"Out of the Gene Pool Re-Runs", u"http://www.gocomics.com/outofthegenepool"),
|
||||
#(u"Over the Hedge", u"http://www.gocomics.com/overthehedge"),
|
||||
#(u"Overboard", u"http://www.gocomics.com/overboard"),
|
||||
#(u"PC and Pixel", u"http://www.gocomics.com/pcandpixel"),
|
||||
(u"Peanuts", u"http://www.gocomics.com/peanuts"),
|
||||
#(u"Pearls Before Swine", u"http://www.gocomics.com/pearlsbeforeswine"),
|
||||
#(u"Pibgorn Sketches", u"http://www.gocomics.com/pibgornsketches"),
|
||||
#(u"Pibgorn", u"http://www.gocomics.com/pibgorn"),
|
||||
(u"Pickles", u"http://www.gocomics.com/pickles"),
|
||||
#(u"Pinkerton", u"http://www.gocomics.com/pinkerton"),
|
||||
#(u"Pluggers", u"http://www.gocomics.com/pluggers"),
|
||||
#(u"Pooch Cafe", u"http://www.gocomics.com/poochcafe"),
|
||||
#(u"PreTeena", u"http://www.gocomics.com/preteena"),
|
||||
#(u"Prickly City", u"http://www.gocomics.com/pricklycity"),
|
||||
#(u"Rabbits Against Magic", u"http://www.gocomics.com/rabbitsagainstmagic"),
|
||||
#(u"Raising Duncan Classics", u"http://www.gocomics.com/raisingduncanclassics"),
|
||||
#(u"Real Life Adventures", u"http://www.gocomics.com/reallifeadventures"),
|
||||
#(u"Reality Check", u"http://www.gocomics.com/realitycheck"),
|
||||
#(u"Red and Rover", u"http://www.gocomics.com/redandrover"),
|
||||
#(u"Red Meat", u"http://www.gocomics.com/redmeat"),
|
||||
#(u"Reynolds Unwrapped", u"http://www.gocomics.com/reynoldsunwrapped"),
|
||||
#(u"Rip Haywire", u"http://www.gocomics.com/riphaywire"),
|
||||
#(u"Ripley's Believe It or Not!", u"http://www.gocomics.com/ripleysbelieveitornot"),
|
||||
#(u"Ronaldinho Gaucho", u"http://www.gocomics.com/ronaldinhogaucho"),
|
||||
#(u"Rose Is Rose", u"http://www.gocomics.com/roseisrose"),
|
||||
#(u"Rubes", u"http://www.gocomics.com/rubes"),
|
||||
#(u"Rudy Park", u"http://www.gocomics.com/rudypark"),
|
||||
#(u"Scary Gary", u"http://www.gocomics.com/scarygary"),
|
||||
#(u"Shirley and Son Classics", u"http://www.gocomics.com/shirleyandsonclassics"),
|
||||
#(u"Shoe", u"http://www.gocomics.com/shoe"),
|
||||
#(u"Shoecabbage", u"http://www.gocomics.com/shoecabbage"),
|
||||
#(u"Skin Horse", u"http://www.gocomics.com/skinhorse"),
|
||||
#(u"Slowpoke", u"http://www.gocomics.com/slowpoke"),
|
||||
#(u"Soup To Nutz", u"http://www.gocomics.com/souptonutz"),
|
||||
#(u"Speed Bump", u"http://www.gocomics.com/speedbump"),
|
||||
#(u"Spot The Frog", u"http://www.gocomics.com/spotthefrog"),
|
||||
#(u"State of the Union", u"http://www.gocomics.com/stateoftheunion"),
|
||||
#(u"Stone Soup", u"http://www.gocomics.com/stonesoup"),
|
||||
#(u"Strange Brew", u"http://www.gocomics.com/strangebrew"),
|
||||
#(u"Sylvia", u"http://www.gocomics.com/sylvia"),
|
||||
#(u"Tank McNamara", u"http://www.gocomics.com/tankmcnamara"),
|
||||
#(u"Tarzan Classics", u"http://www.gocomics.com/tarzanclassics"),
|
||||
#(u"That's Life", u"http://www.gocomics.com/thatslife"),
|
||||
#(u"The Academia Waltz", u"http://www.gocomics.com/academiawaltz"),
|
||||
#(u"The Argyle Sweater", u"http://www.gocomics.com/theargylesweater"),
|
||||
#(u"The Barn", u"http://www.gocomics.com/thebarn"),
|
||||
#(u"The Boiling Point", u"http://www.gocomics.com/theboilingpoint"),
|
||||
#(u"The Boondocks", u"http://www.gocomics.com/boondocks"),
|
||||
#(u"The Born Loser", u"http://www.gocomics.com/thebornloser"),
|
||||
#(u"The Buckets", u"http://www.gocomics.com/thebuckets"),
|
||||
#(u"The City", u"http://www.gocomics.com/thecity"),
|
||||
#(u"The Dinette Set", u"http://www.gocomics.com/dinetteset"),
|
||||
#(u"The Doozies", u"http://www.gocomics.com/thedoozies"),
|
||||
#(u"The Duplex", u"http://www.gocomics.com/duplex"),
|
||||
#(u"The Elderberries", u"http://www.gocomics.com/theelderberries"),
|
||||
#(u"The Flying McCoys", u"http://www.gocomics.com/theflyingmccoys"),
|
||||
#(u"The Fusco Brothers", u"http://www.gocomics.com/thefuscobrothers"),
|
||||
#(u"The Grizzwells", u"http://www.gocomics.com/thegrizzwells"),
|
||||
#(u"The Humble Stumble", u"http://www.gocomics.com/thehumblestumble"),
|
||||
#(u"The Knight Life", u"http://www.gocomics.com/theknightlife"),
|
||||
#(u"The Meaning of Lila", u"http://www.gocomics.com/meaningoflila"),
|
||||
#(u"The Middletons", u"http://www.gocomics.com/themiddletons"),
|
||||
#(u"The Norm", u"http://www.gocomics.com/thenorm"),
|
||||
#(u"The Other Coast", u"http://www.gocomics.com/theothercoast"),
|
||||
#(u"The Quigmans", u"http://www.gocomics.com/thequigmans"),
|
||||
#(u"The Sunshine Club", u"http://www.gocomics.com/thesunshineclub"),
|
||||
#(u"Tiny Sepuk", u"http://www.gocomics.com/tinysepuk"),
|
||||
#(u"TOBY", u"http://www.gocomics.com/toby"),
|
||||
#(u"Tom the Dancing Bug", u"http://www.gocomics.com/tomthedancingbug"),
|
||||
#(u"Too Much Coffee Man", u"http://www.gocomics.com/toomuchcoffeeman"),
|
||||
#(u"Unstrange Phenomena", u"http://www.gocomics.com/unstrangephenomena"),
|
||||
#(u"W.T. Duck", u"http://www.gocomics.com/wtduck"),
|
||||
#(u"Watch Your Head", u"http://www.gocomics.com/watchyourhead"),
|
||||
#(u"Wee Pals", u"http://www.gocomics.com/weepals"),
|
||||
#(u"Winnie the Pooh", u"http://www.gocomics.com/winniethepooh"),
|
||||
#(u"Wizard of Id", u"http://www.gocomics.com/wizardofid"),
|
||||
#(u"Working Daze", u"http://www.gocomics.com/workingdaze"),
|
||||
#(u"Working It Out", u"http://www.gocomics.com/workingitout"),
|
||||
#(u"Yenny", u"http://www.gocomics.com/yenny"),
|
||||
#(u"Zack Hill", u"http://www.gocomics.com/zackhill"),
|
||||
(u"Ziggy", u"http://www.gocomics.com/ziggy"),
|
||||
#
|
||||
######## EDITORIAL CARTOONS #####################
|
||||
(u"Adam Zyglis", u"http://www.gocomics.com/adamzyglis"),
|
||||
#(u"Andy Singer", u"http://www.gocomics.com/andysinger"),
|
||||
#(u"Ben Sargent",u"http://www.gocomics.com/bensargent"),
|
||||
#(u"Bill Day", u"http://www.gocomics.com/billday"),
|
||||
#(u"Bill Schorr", u"http://www.gocomics.com/billschorr"),
|
||||
#(u"Bob Englehart", u"http://www.gocomics.com/bobenglehart"),
|
||||
(u"Bob Gorrell",u"http://www.gocomics.com/bobgorrell"),
|
||||
#(u"Brian Fairrington", u"http://www.gocomics.com/brianfairrington"),
|
||||
#(u"Bruce Beattie", u"http://www.gocomics.com/brucebeattie"),
|
||||
#(u"Cam Cardow", u"http://www.gocomics.com/camcardow"),
|
||||
#(u"Chan Lowe",u"http://www.gocomics.com/chanlowe"),
|
||||
#(u"Chip Bok",u"http://www.gocomics.com/chipbok"),
|
||||
#(u"Chris Britt",u"http://www.gocomics.com/chrisbritt"),
|
||||
#(u"Chuck Asay",u"http://www.gocomics.com/chuckasay"),
|
||||
#(u"Clay Bennett",u"http://www.gocomics.com/claybennett"),
|
||||
#(u"Clay Jones",u"http://www.gocomics.com/clayjones"),
|
||||
#(u"Dan Wasserman",u"http://www.gocomics.com/danwasserman"),
|
||||
#(u"Dana Summers",u"http://www.gocomics.com/danasummers"),
|
||||
#(u"Daryl Cagle", u"http://www.gocomics.com/darylcagle"),
|
||||
#(u"David Fitzsimmons", u"http://www.gocomics.com/davidfitzsimmons"),
|
||||
(u"Dick Locher",u"http://www.gocomics.com/dicklocher"),
|
||||
#(u"Don Wright",u"http://www.gocomics.com/donwright"),
|
||||
#(u"Donna Barstow",u"http://www.gocomics.com/donnabarstow"),
|
||||
#(u"Drew Litton", u"http://www.gocomics.com/drewlitton"),
|
||||
#(u"Drew Sheneman",u"http://www.gocomics.com/drewsheneman"),
|
||||
#(u"Ed Stein", u"http://www.gocomics.com/edstein"),
|
||||
#(u"Eric Allie", u"http://www.gocomics.com/ericallie"),
|
||||
#(u"Gary Markstein", u"http://www.gocomics.com/garymarkstein"),
|
||||
#(u"Gary McCoy", u"http://www.gocomics.com/garymccoy"),
|
||||
#(u"Gary Varvel", u"http://www.gocomics.com/garyvarvel"),
|
||||
#(u"Glenn McCoy",u"http://www.gocomics.com/glennmccoy"),
|
||||
#(u"Henry Payne", u"http://www.gocomics.com/henrypayne"),
|
||||
#(u"Jack Ohman",u"http://www.gocomics.com/jackohman"),
|
||||
#(u"JD Crowe", u"http://www.gocomics.com/jdcrowe"),
|
||||
#(u"Jeff Danziger",u"http://www.gocomics.com/jeffdanziger"),
|
||||
#(u"Jeff Parker", u"http://www.gocomics.com/jeffparker"),
|
||||
#(u"Jeff Stahler", u"http://www.gocomics.com/jeffstahler"),
|
||||
#(u"Jerry Holbert", u"http://www.gocomics.com/jerryholbert"),
|
||||
#(u"Jim Morin",u"http://www.gocomics.com/jimmorin"),
|
||||
#(u"Joel Pett",u"http://www.gocomics.com/joelpett"),
|
||||
#(u"John Cole", u"http://www.gocomics.com/johncole"),
|
||||
#(u"John Darkow", u"http://www.gocomics.com/johndarkow"),
|
||||
#(u"John Deering",u"http://www.gocomics.com/johndeering"),
|
||||
#(u"John Sherffius", u"http://www.gocomics.com/johnsherffius"),
|
||||
#(u"Ken Catalino",u"http://www.gocomics.com/kencatalino"),
|
||||
#(u"Kerry Waghorn",u"http://www.gocomics.com/facesinthenews"),
|
||||
#(u"Kevin Kallaugher",u"http://www.gocomics.com/kevinkallaugher"),
|
||||
#(u"Lalo Alcaraz",u"http://www.gocomics.com/laloalcaraz"),
|
||||
#(u"Larry Wright", u"http://www.gocomics.com/larrywright"),
|
||||
#(u"Lisa Benson", u"http://www.gocomics.com/lisabenson"),
|
||||
#(u"Marshall Ramsey", u"http://www.gocomics.com/marshallramsey"),
|
||||
#(u"Matt Bors", u"http://www.gocomics.com/mattbors"),
|
||||
#(u"Matt Davies",u"http://www.gocomics.com/mattdavies"),
|
||||
#(u"Michael Ramirez", u"http://www.gocomics.com/michaelramirez"),
|
||||
#(u"Mike Keefe", u"http://www.gocomics.com/mikekeefe"),
|
||||
#(u"Mike Luckovich", u"http://www.gocomics.com/mikeluckovich"),
|
||||
#(u"MIke Thompson", u"http://www.gocomics.com/mikethompson"),
|
||||
#(u"Monte Wolverton", u"http://www.gocomics.com/montewolverton"),
|
||||
#(u"Mr. Fish", u"http://www.gocomics.com/mrfish"),
|
||||
#(u"Nate Beeler", u"http://www.gocomics.com/natebeeler"),
|
||||
#(u"Nick Anderson", u"http://www.gocomics.com/nickanderson"),
|
||||
#(u"Pat Bagley", u"http://www.gocomics.com/patbagley"),
|
||||
#(u"Pat Oliphant",u"http://www.gocomics.com/patoliphant"),
|
||||
#(u"Paul Conrad",u"http://www.gocomics.com/paulconrad"),
|
||||
#(u"Paul Szep", u"http://www.gocomics.com/paulszep"),
|
||||
#(u"RJ Matson", u"http://www.gocomics.com/rjmatson"),
|
||||
#(u"Rob Rogers", u"http://www.gocomics.com/robrogers"),
|
||||
#(u"Robert Ariail", u"http://www.gocomics.com/robertariail"),
|
||||
#(u"Scott Stantis", u"http://www.gocomics.com/scottstantis"),
|
||||
#(u"Signe Wilkinson", u"http://www.gocomics.com/signewilkinson"),
|
||||
#(u"Small World",u"http://www.gocomics.com/smallworld"),
|
||||
#(u"Steve Benson", u"http://www.gocomics.com/stevebenson"),
|
||||
#(u"Steve Breen", u"http://www.gocomics.com/stevebreen"),
|
||||
#(u"Steve Kelley", u"http://www.gocomics.com/stevekelley"),
|
||||
#(u"Steve Sack", u"http://www.gocomics.com/stevesack"),
|
||||
#(u"Stuart Carlson",u"http://www.gocomics.com/stuartcarlson"),
|
||||
#(u"Ted Rall",u"http://www.gocomics.com/tedrall"),
|
||||
#(u"(Th)ink", u"http://www.gocomics.com/think"),
|
||||
#(u"Tom Toles",u"http://www.gocomics.com/tomtoles"),
|
||||
(u"Tony Auth",u"http://www.gocomics.com/tonyauth"),
|
||||
#(u"Views of the World",u"http://www.gocomics.com/viewsoftheworld"),
|
||||
#(u"ViewsAfrica",u"http://www.gocomics.com/viewsafrica"),
|
||||
#(u"ViewsAmerica",u"http://www.gocomics.com/viewsamerica"),
|
||||
#(u"ViewsAsia",u"http://www.gocomics.com/viewsasia"),
|
||||
#(u"ViewsBusiness",u"http://www.gocomics.com/viewsbusiness"),
|
||||
#(u"ViewsEurope",u"http://www.gocomics.com/viewseurope"),
|
||||
#(u"ViewsLatinAmerica",u"http://www.gocomics.com/viewslatinamerica"),
|
||||
#(u"ViewsMidEast",u"http://www.gocomics.com/viewsmideast"),
|
||||
(u"Walt Handelsman",u"http://www.gocomics.com/walthandelsman"),
|
||||
#(u"Wayne Stayskal",u"http://www.gocomics.com/waynestayskal"),
|
||||
#(u"Wit of the World",u"http://www.gocomics.com/witoftheworld"),
|
||||
]:
|
||||
print 'Working on: ', title
|
||||
("9 Chickweed Lane", "http://gocomics.com/9_chickweed_lane"),
|
||||
("Agnes", "http://gocomics.com/agnes"),
|
||||
("Alley Oop", "http://gocomics.com/alley_oop"),
|
||||
("Andy Capp", "http://gocomics.com/andy_capp"),
|
||||
("Arlo & Janis", "http://gocomics.com/arlo&janis"),
|
||||
("B.C.", "http://gocomics.com/bc"),
|
||||
("Ballard Street", "http://gocomics.com/ballard_street"),
|
||||
# ("Ben", "http://comics.com/ben"),
|
||||
# ("Betty", "http://comics.com/betty"),
|
||||
# ("Big Nate", "http://comics.com/big_nate"),
|
||||
# ("Brevity", "http://comics.com/brevity"),
|
||||
# ("Candorville", "http://comics.com/candorville"),
|
||||
# ("Cheap Thrills", "http://comics.com/cheap_thrills"),
|
||||
# ("Committed", "http://comics.com/committed"),
|
||||
# ("Cow & Boy", "http://comics.com/cow&boy"),
|
||||
# ("Daddy's Home", "http://comics.com/daddys_home"),
|
||||
# ("Dog eat Doug", "http://comics.com/dog_eat_doug"),
|
||||
# ("Drabble", "http://comics.com/drabble"),
|
||||
# ("F Minus", "http://comics.com/f_minus"),
|
||||
# ("Family Tree", "http://comics.com/family_tree"),
|
||||
# ("Farcus", "http://comics.com/farcus"),
|
||||
# ("Fat Cats Classics", "http://comics.com/fat_cats_classics"),
|
||||
# ("Ferd'nand", "http://comics.com/ferdnand"),
|
||||
# ("Flight Deck", "http://comics.com/flight_deck"),
|
||||
# ("Flo & Friends", "http://comics.com/flo&friends"),
|
||||
# ("Fort Knox", "http://comics.com/fort_knox"),
|
||||
# ("Frank & Ernest", "http://comics.com/frank&ernest"),
|
||||
# ("Frazz", "http://comics.com/frazz"),
|
||||
# ("Free Range", "http://comics.com/free_range"),
|
||||
# ("Geech Classics", "http://comics.com/geech_classics"),
|
||||
# ("Get Fuzzy", "http://comics.com/get_fuzzy"),
|
||||
# ("Girls & Sports", "http://comics.com/girls&sports"),
|
||||
# ("Graffiti", "http://comics.com/graffiti"),
|
||||
# ("Grand Avenue", "http://comics.com/grand_avenue"),
|
||||
# ("Heathcliff", "http://comics.com/heathcliff"),
|
||||
# "Heathcliff, a street-smart and mischievous cat with many adventures."
|
||||
# ("Herb and Jamaal", "http://comics.com/herb_and_jamaal"),
|
||||
# ("Herman", "http://comics.com/herman"),
|
||||
# ("Home and Away", "http://comics.com/home_and_away"),
|
||||
# ("It's All About You", "http://comics.com/its_all_about_you"),
|
||||
# ("Jane's World", "http://comics.com/janes_world"),
|
||||
# ("Jump Start", "http://comics.com/jump_start"),
|
||||
# ("Kit 'N' Carlyle", "http://comics.com/kit_n_carlyle"),
|
||||
# ("Li'l Abner Classics", "http://comics.com/lil_abner_classics"),
|
||||
# ("Liberty Meadows", "http://comics.com/liberty_meadows"),
|
||||
# ("Little Dog Lost", "http://comics.com/little_dog_lost"),
|
||||
# ("Lola", "http://comics.com/lola"),
|
||||
# ("Luann", "http://comics.com/luann"),
|
||||
# ("Marmaduke", "http://comics.com/marmaduke"),
|
||||
# ("Meg! Classics", "http://comics.com/meg_classics"),
|
||||
# ("Minimum Security", "http://comics.com/minimum_security"),
|
||||
# ("Moderately Confused", "http://comics.com/moderately_confused"),
|
||||
# ("Momma", "http://comics.com/momma"),
|
||||
# ("Monty", "http://comics.com/monty"),
|
||||
# ("Motley Classics", "http://comics.com/motley_classics"),
|
||||
# ("Nancy", "http://comics.com/nancy"),
|
||||
# ("Natural Selection", "http://comics.com/natural_selection"),
|
||||
# ("Nest Heads", "http://comics.com/nest_heads"),
|
||||
# ("Off The Mark", "http://comics.com/off_the_mark"),
|
||||
# ("On a Claire Day", "http://comics.com/on_a_claire_day"),
|
||||
# ("One Big Happy Classics", "http://comics.com/one_big_happy_classics"),
|
||||
# ("Over the Hedge", "http://comics.com/over_the_hedge"),
|
||||
# ("PC and Pixel", "http://comics.com/pc_and_pixel"),
|
||||
# ("Peanuts", "http://comics.com/peanuts"),
|
||||
# ("Pearls Before Swine", "http://comics.com/pearls_before_swine"),
|
||||
# ("Pickles", "http://comics.com/pickles"),
|
||||
# ("Prickly City", "http://comics.com/prickly_city"),
|
||||
# ("Raising Duncan Classics", "http://comics.com/raising_duncan_classics"),
|
||||
# ("Reality Check", "http://comics.com/reality_check"),
|
||||
# ("Red & Rover", "http://comics.com/red&rover"),
|
||||
# ("Rip Haywire", "http://comics.com/rip_haywire"),
|
||||
# ("Ripley's Believe It or Not!", "http://comics.com/ripleys_believe_it_or_not"),
|
||||
# ("Rose Is Rose", "http://comics.com/rose_is_rose"),
|
||||
# ("Rubes", "http://comics.com/rubes"),
|
||||
# ("Rudy Park", "http://comics.com/rudy_park"),
|
||||
# ("Scary Gary", "http://comics.com/scary_gary"),
|
||||
# ("Shirley and Son Classics", "http://comics.com/shirley_and_son_classics"),
|
||||
# ("Soup To Nutz", "http://comics.com/soup_to_nutz"),
|
||||
# ("Speed Bump", "http://comics.com/speed_bump"),
|
||||
# ("Spot The Frog", "http://comics.com/spot_the_frog"),
|
||||
# ("State of the Union", "http://comics.com/state_of_the_union"),
|
||||
# ("Strange Brew", "http://comics.com/strange_brew"),
|
||||
# ("Tarzan Classics", "http://comics.com/tarzan_classics"),
|
||||
# ("That's Life", "http://comics.com/thats_life"),
|
||||
# ("The Barn", "http://comics.com/the_barn"),
|
||||
# ("The Born Loser", "http://comics.com/the_born_loser"),
|
||||
# ("The Buckets", "http://comics.com/the_buckets"),
|
||||
# ("The Dinette Set", "http://comics.com/the_dinette_set"),
|
||||
# ("The Grizzwells", "http://comics.com/the_grizzwells"),
|
||||
# ("The Humble Stumble", "http://comics.com/the_humble_stumble"),
|
||||
# ("The Knight Life", "http://comics.com/the_knight_life"),
|
||||
# ("The Meaning of Lila", "http://comics.com/the_meaning_of_lila"),
|
||||
# ("The Other Coast", "http://comics.com/the_other_coast"),
|
||||
# ("The Sunshine Club", "http://comics.com/the_sunshine_club"),
|
||||
# ("Unstrange Phenomena", "http://comics.com/unstrange_phenomena"),
|
||||
# ("Watch Your Head", "http://comics.com/watch_your_head"),
|
||||
# ("Wizard of Id", "http://comics.com/wizard_of_id"),
|
||||
# ("Working Daze", "http://comics.com/working_daze"),
|
||||
# ("Working It Out", "http://comics.com/working_it_out"),
|
||||
# ("Zack Hill", "http://comics.com/zack_hill"),
|
||||
# ("(Th)ink", "http://comics.com/think"),
|
||||
# "Tackling the political and social issues impacting communities of color."
|
||||
# ("Adam Zyglis", "http://comics.com/adam_zyglis"),
|
||||
# "Known for his excellent caricatures, as well as independent and incisive imagery. "
|
||||
# ("Andy Singer", "http://comics.com/andy_singer"),
|
||||
# ("Bill Day", "http://comics.com/bill_day"),
|
||||
# "Powerful images on sensitive issues."
|
||||
# ("Bill Schorr", "http://comics.com/bill_schorr"),
|
||||
# ("Bob Englehart", "http://comics.com/bob_englehart"),
|
||||
# ("Brian Fairrington", "http://comics.com/brian_fairrington"),
|
||||
# ("Bruce Beattie", "http://comics.com/bruce_beattie"),
|
||||
# ("Cam Cardow", "http://comics.com/cam_cardow"),
|
||||
# ("Chip Bok", "http://comics.com/chip_bok"),
|
||||
# ("Chris Britt", "http://comics.com/chris_britt"),
|
||||
# ("Chuck Asay", "http://comics.com/chuck_asay"),
|
||||
# ("Clay Bennett", "http://comics.com/clay_bennett"),
|
||||
# ("Daryl Cagle", "http://comics.com/daryl_cagle"),
|
||||
# ("David Fitzsimmons", "http://comics.com/david_fitzsimmons"),
|
||||
# "David Fitzsimmons is a new editorial cartoons on comics.com. He is also a staff writer and editorial cartoonist for the Arizona Daily Star. "
|
||||
# ("Drew Litton", "http://comics.com/drew_litton"),
|
||||
# "Drew Litton is an artist who is probably best known for his sports cartoons. He received the National Cartoonist Society Sports Cartoon Award for 1993. "
|
||||
# ("Ed Stein", "http://comics.com/ed_stein"),
|
||||
# "Winner of the Fischetti Award in 2006 and the Scripps Howard National Journalism Award, 1999, Ed Stein has been the editorial cartoonist for the Rocky Mountain News since 1978. "
|
||||
# ("Eric Allie", "http://comics.com/eric_allie"),
|
||||
# "Eric Allie is an editorial cartoonist with the Pioneer Press and CNS News. "
|
||||
# ("Gary Markstein", "http://comics.com/gary_markstein"),
|
||||
# ("Gary McCoy", "http://comics.com/gary_mccoy"),
|
||||
# "Gary McCoy is known for his editorial cartoons, humor and inane ramblings. He is a 2 time nominee for Best Magazine Cartoonist of the Year by the National Cartoonists Society. He resides in Belleville, IL. "
|
||||
# ("Gary Varvel", "http://comics.com/gary_varvel"),
|
||||
# ("Henry Payne", "http://comics.com/henry_payne"),
|
||||
# ("JD Crowe", "http://comics.com/jd_crowe"),
|
||||
# ("Jeff Parker", "http://comics.com/jeff_parker"),
|
||||
# ("Jeff Stahler", "http://comics.com/jeff_stahler"),
|
||||
# ("Jerry Holbert", "http://comics.com/jerry_holbert"),
|
||||
# ("John Cole", "http://comics.com/john_cole"),
|
||||
# ("John Darkow", "http://comics.com/john_darkow"),
|
||||
# "John Darkow is a contributing editorial cartoonist for the Humor Times as well as editoiral cartoonist for the Columbia Daily Tribune, Missouri"
|
||||
# ("John Sherffius", "http://comics.com/john_sherffius"),
|
||||
# ("Larry Wright", "http://comics.com/larry_wright"),
|
||||
# ("Lisa Benson", "http://comics.com/lisa_benson"),
|
||||
# ("Marshall Ramsey", "http://comics.com/marshall_ramsey"),
|
||||
# ("Matt Bors", "http://comics.com/matt_bors"),
|
||||
# ("Michael Ramirez", "http://comics.com/michael_ramirez"),
|
||||
# ("Mike Keefe", "http://comics.com/mike_keefe"),
|
||||
# ("Mike Luckovich", "http://comics.com/mike_luckovich"),
|
||||
# ("MIke Thompson", "http://comics.com/mike_thompson"),
|
||||
# ("Monte Wolverton", "http://comics.com/monte_wolverton"),
|
||||
# "Unique mix of perspectives"
|
||||
# ("Mr. Fish", "http://comics.com/mr_fish"),
|
||||
# "Side effects may include swelling"
|
||||
# ("Nate Beeler", "http://comics.com/nate_beeler"),
|
||||
# "Middle America meets the Beltway."
|
||||
# ("Nick Anderson", "http://comics.com/nick_anderson"),
|
||||
# ("Pat Bagley", "http://comics.com/pat_bagley"),
|
||||
# "Unfair and Totally Unbalanced."
|
||||
# ("Paul Szep", "http://comics.com/paul_szep"),
|
||||
# ("RJ Matson", "http://comics.com/rj_matson"),
|
||||
# "Power cartoons from NYC and Capitol Hill"
|
||||
# ("Rob Rogers", "http://comics.com/rob_rogers"),
|
||||
# "Humorous slant on current events"
|
||||
# ("Robert Ariail", "http://comics.com/robert_ariail"),
|
||||
# "Clever and unpredictable"
|
||||
# ("Scott Stantis", "http://comics.com/scott_stantis"),
|
||||
# ("Signe Wilkinson", "http://comics.com/signe_wilkinson"),
|
||||
# ("Steve Benson", "http://comics.com/steve_benson"),
|
||||
# ("Steve Breen", "http://comics.com/steve_breen"),
|
||||
# ("Steve Kelley", "http://comics.com/steve_kelley"),
|
||||
# ("Steve Sack", "http://comics.com/steve_sack"),
|
||||
]:
|
||||
articles = self.make_links(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
return feeds
|
||||
|
||||
def make_links(self, url):
|
||||
title = 'Temp'
|
||||
soup = self.index_to_soup(url)
|
||||
# print 'soup: ', soup
|
||||
title = ''
|
||||
current_articles = []
|
||||
pages = range(1, self.num_comics_to_get+1)
|
||||
for page in pages:
|
||||
page_soup = self.index_to_soup(url)
|
||||
if page_soup:
|
||||
try:
|
||||
strip_title = page_soup.find(name='div', attrs={'class':'top'}).h1.a.string
|
||||
except:
|
||||
strip_title = 'Error - no Title found'
|
||||
try:
|
||||
date_title = page_soup.find('ul', attrs={'class': 'feature-nav'}).li.string
|
||||
if not date_title:
|
||||
date_title = page_soup.find('ul', attrs={'class': 'feature-nav'}).li.string
|
||||
except:
|
||||
date_title = 'Error - no Date found'
|
||||
title = strip_title + ' - ' + date_title
|
||||
for i in range(2):
|
||||
try:
|
||||
strip_url_date = page_soup.find(name='div', attrs={'class':'top'}).h1.a['href']
|
||||
break #success - this is normal exit
|
||||
except:
|
||||
strip_url_date = None
|
||||
continue #try to get strip_url_date again
|
||||
for i in range(2):
|
||||
try:
|
||||
prev_strip_url_date = page_soup.find('a', attrs={'class': 'prev'})['href']
|
||||
break #success - this is normal exit
|
||||
except:
|
||||
prev_strip_url_date = None
|
||||
continue #try to get prev_strip_url_date again
|
||||
if strip_url_date:
|
||||
page_url = 'http://www.gocomics.com' + strip_url_date
|
||||
else:
|
||||
continue
|
||||
if prev_strip_url_date:
|
||||
prev_page_url = 'http://www.gocomics.com' + prev_strip_url_date
|
||||
else:
|
||||
continue
|
||||
from datetime import datetime, timedelta
|
||||
now = datetime.now()
|
||||
dates = [(now-timedelta(days=d)).strftime('%Y/%m/%d') for d in range(self.num_comics_to_get)]
|
||||
|
||||
for page in dates:
|
||||
page_url = url + '/' + str(page)
|
||||
print(page_url)
|
||||
soup = self.index_to_soup(page_url)
|
||||
if soup:
|
||||
strip_tag = self.tag_to_string(soup.find('a'))
|
||||
if strip_tag:
|
||||
print 'strip_tag: ', strip_tag
|
||||
title = strip_tag
|
||||
print 'title: ', title
|
||||
current_articles.append({'title': title, 'url': page_url, 'description':'', 'date':''})
|
||||
url = prev_page_url
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
if soup.title:
|
||||
title_string = soup.title.string.strip()
|
||||
_cd = title_string.split(',',1)[1]
|
||||
comic_date = ' '.join(_cd.split(' ', 4)[0:-1])
|
||||
if soup.h1.span:
|
||||
artist = soup.h1.span.string
|
||||
soup.h1.span.string.replaceWith(comic_date + artist)
|
||||
feature_item = soup.find('p',attrs={'class':'feature_item'})
|
||||
if feature_item.a:
|
||||
a_tag = feature_item.a
|
||||
a_href = a_tag["href"]
|
||||
img_tag = a_tag.img
|
||||
img_tag["src"] = a_href
|
||||
img_tag["width"] = self.comic_size
|
||||
img_tag["height"] = None
|
||||
return self.adeify_images(soup)
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
img {max-width:100%; min-width:100%;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
|
||||
'''
|
||||
|
@ -1,6 +1,4 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
from datetime import date, timedelta
|
||||
|
||||
class HBR(BasicNewsRecipe):
|
||||
|
||||
@ -11,23 +9,18 @@ class HBR(BasicNewsRecipe):
|
||||
timefmt = ' [%B %Y]'
|
||||
language = 'en'
|
||||
no_stylesheets = True
|
||||
# recipe_disabled = ('hbr.org has started requiring the use of javascript'
|
||||
# ' to log into their website. This is unsupported in calibre, so'
|
||||
# ' this recipe has been disabled. If you would like to see '
|
||||
# ' HBR supported in calibre, contact hbr.org and ask them'
|
||||
# ' to provide a javascript free login method.')
|
||||
|
||||
LOGIN_URL = 'https://hbr.org/login?request_url=/'
|
||||
LOGOUT_URL = 'https://hbr.org/logout?request_url=/'
|
||||
|
||||
INDEX = 'http://hbr.org/archive-toc/BR'
|
||||
INDEX = 'http://hbr.org'
|
||||
|
||||
keep_only_tags = [dict(name='div', id='pageContainer')]
|
||||
remove_tags = [dict(id=['mastheadContainer', 'magazineHeadline',
|
||||
'articleToolbarTopRD', 'pageRightSubColumn', 'pageRightColumn',
|
||||
'todayOnHBRListWidget', 'mostWidget', 'keepUpWithHBR',
|
||||
'mailingListTout', 'partnerCenter', 'pageFooter',
|
||||
'superNavHeadContainer', 'hbrDisqus',
|
||||
'superNavHeadContainer', 'hbrDisqus', 'article-toolbox',
|
||||
'articleToolbarTop', 'articleToolbarBottom', 'articleToolbarRD']),
|
||||
dict(name='iframe')]
|
||||
extra_css = '''
|
||||
@ -57,22 +50,6 @@ class HBR(BasicNewsRecipe):
|
||||
if url.endswith('/ar/1'):
|
||||
return url[:-1]+'pr'
|
||||
|
||||
def hbr_get_toc(self):
|
||||
# return self.index_to_soup(open('/t/toc.html').read())
|
||||
|
||||
today = date.today()
|
||||
future = today + timedelta(days=30)
|
||||
past = today - timedelta(days=30)
|
||||
for x in [x.strftime('%y%m') for x in (future, today, past)]:
|
||||
url = self.INDEX + x
|
||||
soup = self.index_to_soup(url)
|
||||
if (not soup.find(text='Issue Not Found') and not soup.find(
|
||||
text="We're Sorry. There was an error processing your request")
|
||||
and 'Exception: java.io.FileNotFoundException' not in
|
||||
unicode(soup)):
|
||||
return soup
|
||||
raise Exception('Could not find current issue')
|
||||
|
||||
def hbr_parse_toc(self, soup):
|
||||
feeds = []
|
||||
current_section = None
|
||||
@ -105,23 +82,19 @@ class HBR(BasicNewsRecipe):
|
||||
|
||||
articles.append({'title':title, 'url':url, 'description':desc,
|
||||
'date':''})
|
||||
|
||||
if current_section is not None and articles:
|
||||
feeds.append((current_section, articles))
|
||||
return feeds
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.hbr_get_toc()
|
||||
# open('/t/hbr.html', 'wb').write(unicode(soup).encode('utf-8'))
|
||||
soup0 = self.index_to_soup('http://hbr.org/magazine')
|
||||
datencover = soup0.find('ul', attrs={'id':'magazineArchiveCarousel'}).findAll('li')[-1]
|
||||
#find date & cover
|
||||
self.cover_url=datencover.img['src']
|
||||
dates=self.tag_to_string(datencover.img['alt'])
|
||||
self.timefmt = u' [%s]'%dates
|
||||
soup = self.index_to_soup(self.INDEX + soup0.find('div', attrs = {'class':'magazine_page'}).a['href'])
|
||||
feeds = self.hbr_parse_toc(soup)
|
||||
return feeds
|
||||
|
||||
def get_cover_url(self):
|
||||
cover_url = None
|
||||
index = 'http://hbr.org/current'
|
||||
soup = self.index_to_soup(index)
|
||||
link_item = soup.find('img', alt=re.compile("Current Issue"), src=True)
|
||||
|
||||
if link_item:
|
||||
cover_url = 'http://hbr.org' + link_item['src']
|
||||
|
||||
return cover_url
|
||||
|
||||
|
||||
|
BIN
recipes/icons/forbes_pl.png
Normal file
After Width: | Height: | Size: 1.2 KiB |
Before Width: | Height: | Size: 802 B After Width: | Height: | Size: 294 B |
Before Width: | Height: | Size: 802 B After Width: | Height: | Size: 294 B |
Before Width: | Height: | Size: 802 B After Width: | Height: | Size: 294 B |
Before Width: | Height: | Size: 802 B After Width: | Height: | Size: 294 B |
BIN
recipes/icons/newsweek_polska.png
Normal file
After Width: | Height: | Size: 905 B |
BIN
recipes/icons/slashdot.png
Normal file
After Width: | Height: | Size: 250 B |
BIN
recipes/icons/sportowefakty.png
Normal file
After Width: | Height: | Size: 511 B |
BIN
recipes/icons/wysokie_obcasy.png
Normal file
After Width: | Height: | Size: 205 B |
@ -41,6 +41,7 @@ class TheIndependentNew(BasicNewsRecipe):
|
||||
publication_type = 'newspaper'
|
||||
masthead_url = 'http://www.independent.co.uk/independent.co.uk/editorial/logo/independent_Masthead.png'
|
||||
encoding = 'utf-8'
|
||||
compress_news_images = True
|
||||
remove_tags =[
|
||||
dict(attrs={'id' : ['RelatedArtTag','renderBiography']}),
|
||||
dict(attrs={'class' : ['autoplay','openBiogPopup']}),
|
||||
@ -343,7 +344,7 @@ class TheIndependentNew(BasicNewsRecipe):
|
||||
if 'class' in subtitle_div:
|
||||
clazz = subtitle_div['class'] + ' '
|
||||
clazz = clazz + 'subtitle'
|
||||
subtitle_div['class'] = clazz
|
||||
subtitle_div['class'] = clazz
|
||||
|
||||
#find broken images and remove captions
|
||||
items_to_extract = []
|
||||
|
11
recipes/lightspeed_magazine.recipe
Normal file
@ -0,0 +1,11 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1366025923(BasicNewsRecipe):
|
||||
title = u'Lightspeed Magazine'
|
||||
language = 'en'
|
||||
__author__ = 'Jose Pinto'
|
||||
oldest_article = 31
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
use_embedded_content = False
|
||||
feeds = [(u'Lastest Stories', u'http://www.lightspeedmagazine.com/rss-2/')]
|
@ -36,6 +36,9 @@ from BeautifulSoup import BeautifulSoup
|
||||
Changed order of regex to speedup proces
|
||||
Version 1.9.3 23-05-2012
|
||||
Updated Cover image
|
||||
Version 1.9.4 19-04-2013
|
||||
Added regex filter for mailto
|
||||
Updated for new layout of metro-site
|
||||
'''
|
||||
|
||||
class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||
@ -43,7 +46,7 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||
oldest_article = 1.2
|
||||
max_articles_per_feed = 25
|
||||
__author__ = u'DrMerry'
|
||||
description = u'Metro Nederland'
|
||||
description = u'Metro Nederland v1.9.4 2013-04-19'
|
||||
language = u'nl'
|
||||
simultaneous_downloads = 5
|
||||
masthead_url = 'http://blog.metronieuws.nl/wp-content/themes/metro/images/header.gif'
|
||||
@ -68,13 +71,17 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||
#(re.compile('(</?)h2', re.DOTALL|re.IGNORECASE),lambda match:'\1em')
|
||||
]
|
||||
|
||||
remove_tags_before= dict(id='date')
|
||||
remove_tags_after = [dict(name='div', attrs={'class':['column-1-3','gallery-text']})]#id='share-and-byline')]
|
||||
remove_tags_before= dict(id='subwrapper')
|
||||
remove_tags_after = dict(name='div', attrs={'class':['body-area','article-main-area']})
|
||||
#name='div', attrs={'class':['subwrapper']})]
|
||||
#'column-1-3','gallery-text']})]#id='share-and-byline')]
|
||||
|
||||
filter_regexps = [r'mailto:.*']
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['iframe','script','noscript','style']),
|
||||
dict(name='div', attrs={'class':['column-4-5','column-1-5','ad-msg','col-179 ','col-373 ','clear','ad','navigation',re.compile('share-tools(-top)?'),'tools','metroCommentFormWrap','article-tools-below-title','related-links','padding-top-15',re.compile('^promo.*?$'),'teaser-component',re.compile('fb(-comments|_iframe_widget)'),'promos','header-links','promo-2']}),
|
||||
dict(id=['column-1-5-bottom','column-4-5',re.compile('^ad(\d+|adcomp.*?)?$'),'adadcomp-4','margin-5','sidebar',re.compile('^article-\d'),'comments','gallery-1']),
|
||||
dict(name='div', attrs={'class':['aside clearfix','aside clearfix middle-col-line','comments','share-tools','article-right-column','column-4-5','column-1-5','ad-msg','col-179 ','col-373 ','clear','ad','navigation',re.compile('share-tools(-top)?'),'tools','metroCommentFormWrap','article-tools-below-title','related-links','padding-top-15',re.compile('^promo.*?$'),'teaser-component',re.compile('fb(-comments|_iframe_widget)'),'promos','header-links','promo-2']}),
|
||||
dict(id=['article-2','googleads','column-1-5-bottom','column-4-5',re.compile('^ad(\d+|adcomp.*?)?$'),'adadcomp-4','margin-5','sidebar',re.compile('^article-\d'),'comments','gallery-1','sharez_container','ts-container','topshares','ts-title']),
|
||||
dict(name='a', attrs={'name':'comments'}),
|
||||
#dict(name='div', attrs={'data-href'}),
|
||||
dict(name='img', attrs={'class':'top-line','title':'volledig scherm'}),
|
||||
|
@ -6,10 +6,10 @@ import time
|
||||
|
||||
class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||
title = u'Metro UK'
|
||||
description = 'News as provided by The Metro -UK'
|
||||
description = 'News from The Metro, UK'
|
||||
#timefmt = ''
|
||||
__author__ = 'fleclerc & Dave Asbury'
|
||||
#last update 20/1/13
|
||||
__author__ = 'Dave Asbury'
|
||||
#last update 4/4/13
|
||||
#cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/276636_117118184990145_2132092232_n.jpg'
|
||||
|
||||
cover_url = 'https://twimg0-a.akamaihd.net/profile_images/1638332595/METRO_LETTERS-01.jpg'
|
||||
@ -22,7 +22,7 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||
|
||||
language = 'en_GB'
|
||||
masthead_url = 'http://e-edition.metro.co.uk/images/metro_logo.gif'
|
||||
|
||||
compress_news_images = True
|
||||
def parse_index(self):
|
||||
articles = {}
|
||||
key = None
|
||||
|
@ -1,64 +1,44 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2013, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
newyorker.com
|
||||
'''
|
||||
|
||||
'''
|
||||
www.canada.com
|
||||
'''
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulStoneSoup
|
||||
|
||||
class NewYorker(BasicNewsRecipe):
|
||||
title = 'The New Yorker'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'The best of US journalism'
|
||||
oldest_article = 15
|
||||
language = 'en'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
publisher = 'Conde Nast Publications'
|
||||
category = 'news, politics, USA'
|
||||
encoding = 'cp1252'
|
||||
publication_type = 'magazine'
|
||||
masthead_url = 'http://www.newyorker.com/css/i/hed/logo.gif'
|
||||
extra_css = """
|
||||
body {font-family: "Times New Roman",Times,serif}
|
||||
.articleauthor{color: #9F9F9F;
|
||||
font-family: Arial, sans-serif;
|
||||
font-size: small;
|
||||
text-transform: uppercase}
|
||||
.rubric,.dd,h6#credit{color: #CD0021;
|
||||
font-family: Arial, sans-serif;
|
||||
font-size: small;
|
||||
text-transform: uppercase}
|
||||
.descender:first-letter{display: inline; font-size: xx-large; font-weight: bold}
|
||||
.dd,h6#credit{color: gray}
|
||||
.c{display: block}
|
||||
.caption,h2#articleintro{font-style: italic}
|
||||
.caption{font-size: small}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'pagebody'})]
|
||||
remove_tags = [
|
||||
dict(name=['meta','iframe','base','link','embed','object'])
|
||||
,dict(attrs={'class':['utils','socialUtils','articleRailLinks','icons','social-utils-top','entry-keywords','entry-categories','utilsPrintEmail'] })
|
||||
,dict(attrs={'id':['show-header','show-footer'] })
|
||||
]
|
||||
remove_tags_after = dict(attrs={'class':'entry-content'})
|
||||
remove_attributes = ['lang']
|
||||
feeds = [(u'The New Yorker', u'http://www.newyorker.com/services/mrss/feeds/everything.xml')]
|
||||
title = u'New Yorker Magazine'
|
||||
newyorker_prefix = 'http://m.newyorker.com'
|
||||
description = u'Content from the New Yorker website'
|
||||
fp_tag = 'CAN_TC'
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '?printable=true¤tPage=all'
|
||||
masthead_url = 'http://www.newyorker.com/images/elements/print/newyorker_printlogo.gif'
|
||||
|
||||
def image_url_processor(self, baseurl, url):
|
||||
return url.strip()
|
||||
compress_news_images = True
|
||||
compress_news_images_auto_size = 8
|
||||
scale_news_images_to_device = False
|
||||
scale_news_images = (768, 1024)
|
||||
|
||||
url_list = []
|
||||
language = 'en'
|
||||
__author__ = 'Nick Redding'
|
||||
no_stylesheets = True
|
||||
timefmt = ' [%b %d]'
|
||||
encoding = 'utf-8'
|
||||
extra_css = '''
|
||||
.byline { font-size:xx-small; font-weight: bold;}
|
||||
h3 { margin-bottom: 6px; }
|
||||
.caption { font-size: xx-small; font-style: italic; font-weight: normal; }
|
||||
'''
|
||||
keep_only_tags = [dict(name='div', attrs={'id':re.compile('pagebody')})]
|
||||
|
||||
remove_tags = [{'class':'socialUtils'},{'class':'entry-keywords'}]
|
||||
|
||||
def get_cover_url(self):
|
||||
cover_url = "http://www.newyorker.com/images/covers/1925/1925_02_21_p233.jpg"
|
||||
@ -68,13 +48,233 @@ class NewYorker(BasicNewsRecipe):
|
||||
cover_url = 'http://www.newyorker.com' + cover_item.div.img['src'].strip()
|
||||
return cover_url
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
auth = soup.find(attrs={'id':'articleauthor'})
|
||||
if auth:
|
||||
alink = auth.find('a')
|
||||
if alink and alink.string is not None:
|
||||
txt = alink.string
|
||||
alink.replaceWith(txt)
|
||||
def fixChars(self,string):
|
||||
# Replace lsquo (\x91)
|
||||
fixed = re.sub("\x91","‘",string)
|
||||
# Replace rsquo (\x92)
|
||||
fixed = re.sub("\x92","’",fixed)
|
||||
# Replace ldquo (\x93)
|
||||
fixed = re.sub("\x93","“",fixed)
|
||||
# Replace rdquo (\x94)
|
||||
fixed = re.sub("\x94","”",fixed)
|
||||
# Replace ndash (\x96)
|
||||
fixed = re.sub("\x96","–",fixed)
|
||||
# Replace mdash (\x97)
|
||||
fixed = re.sub("\x97","—",fixed)
|
||||
fixed = re.sub("’","’",fixed)
|
||||
return fixed
|
||||
|
||||
def massageNCXText(self, description):
|
||||
# Kindle TOC descriptions won't render certain characters
|
||||
if description:
|
||||
massaged = unicode(BeautifulStoneSoup(description, convertEntities=BeautifulStoneSoup.HTML_ENTITIES))
|
||||
# Replace '&' with '&'
|
||||
massaged = re.sub("&","&", massaged)
|
||||
return self.fixChars(massaged)
|
||||
else:
|
||||
return description
|
||||
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
if first:
|
||||
picdiv = soup.find('body').find('img')
|
||||
if picdiv is not None:
|
||||
self.add_toc_thumbnail(article,re.sub(r'links\\link\d+\\','',picdiv['src']))
|
||||
xtitle = article.text_summary.strip()
|
||||
if len(xtitle) == 0:
|
||||
desc = soup.find('meta',attrs={'property':'og:description'})
|
||||
if desc is not None:
|
||||
article.summary = article.text_summary = desc['content']
|
||||
shortparagraph = ""
|
||||
## try:
|
||||
if len(article.text_summary.strip()) == 0:
|
||||
articlebodies = soup.findAll('div',attrs={'class':'entry-content'})
|
||||
if articlebodies:
|
||||
for articlebody in articlebodies:
|
||||
if articlebody:
|
||||
paras = articlebody.findAll('p')
|
||||
for p in paras:
|
||||
refparagraph = self.massageNCXText(self.tag_to_string(p,use_alt=False)).strip()
|
||||
#account for blank paragraphs and short paragraphs by appending them to longer ones
|
||||
if len(refparagraph) > 0:
|
||||
if len(refparagraph) > 70: #approximately one line of text
|
||||
newpara = shortparagraph + refparagraph
|
||||
article.summary = article.text_summary = newpara.strip()
|
||||
return
|
||||
else:
|
||||
shortparagraph = refparagraph + " "
|
||||
if shortparagraph.strip().find(" ") == -1 and not shortparagraph.strip().endswith(":"):
|
||||
shortparagraph = shortparagraph + "- "
|
||||
else:
|
||||
article.summary = article.text_summary = self.massageNCXText(article.text_summary)
|
||||
## except:
|
||||
## self.log("Error creating article descriptions")
|
||||
## return
|
||||
|
||||
|
||||
def strip_anchors(self,soup):
|
||||
paras = soup.findAll(True)
|
||||
for para in paras:
|
||||
aTags = para.findAll('a')
|
||||
for a in aTags:
|
||||
if a.img is None:
|
||||
a.replaceWith(a.renderContents().decode('cp1252','replace'))
|
||||
return soup
|
||||
|
||||
def preprocess_html(self,soup):
|
||||
dateline = soup.find('div','published')
|
||||
byline = soup.find('div','byline')
|
||||
title = soup.find('h1','entry-title')
|
||||
if title is None:
|
||||
return self.strip_anchors(soup)
|
||||
if byline is None:
|
||||
title.append(dateline)
|
||||
return self.strip_anchors(soup)
|
||||
byline.append(dateline)
|
||||
return self.strip_anchors(soup)
|
||||
|
||||
def load_global_nav(self,soup):
|
||||
seclist = []
|
||||
ul = soup.find('ul',attrs={'id':re.compile('global-nav-menu')})
|
||||
if ul is not None:
|
||||
for li in ul.findAll('li'):
|
||||
if li.a is not None:
|
||||
securl = li.a['href']
|
||||
if securl != '/' and securl != '/magazine' and securl.startswith('/'):
|
||||
seclist.append((self.tag_to_string(li.a),self.newyorker_prefix+securl))
|
||||
return seclist
|
||||
|
||||
def exclude_url(self,url):
|
||||
if url in self.url_list:
|
||||
return True
|
||||
if not url.endswith('html'):
|
||||
return True
|
||||
if 'goings-on-about-town-app' in url:
|
||||
return True
|
||||
if 'something-to-be-thankful-for' in url:
|
||||
return True
|
||||
if '/shouts/' in url:
|
||||
return True
|
||||
if 'out-loud' in url:
|
||||
return True
|
||||
if '/rss/' in url:
|
||||
return True
|
||||
if '/video-' in url:
|
||||
return True
|
||||
self.url_list.append(url)
|
||||
return False
|
||||
|
||||
|
||||
def load_index_page(self,soup):
|
||||
article_list = []
|
||||
for div in soup.findAll('div',attrs={'class':re.compile('^rotator')}):
|
||||
h2 = div.h2
|
||||
if h2 is not None:
|
||||
a = h2.a
|
||||
if a is not None:
|
||||
url = a['href']
|
||||
if not self.exclude_url(url):
|
||||
if url.startswith('/'):
|
||||
url = self.newyorker_prefix+url
|
||||
byline = h2.span
|
||||
if byline is not None:
|
||||
author = self.tag_to_string(byline)
|
||||
if author.startswith('by '):
|
||||
author.replace('by ','')
|
||||
byline.extract()
|
||||
else:
|
||||
author = ''
|
||||
if h2.br is not None:
|
||||
h2.br.replaceWith(' ')
|
||||
title = self.tag_to_string(h2)
|
||||
desc = div.find(attrs={'class':['rotator-ad-body','feature-blurb-text']})
|
||||
if desc is not None:
|
||||
description = self.tag_to_string(desc)
|
||||
else:
|
||||
description = ''
|
||||
article_list.append(dict(title=title,url=url,date='',description=description,author=author,content=''))
|
||||
ul = div.find('ul','feature-blurb-links')
|
||||
if ul is not None:
|
||||
for li in ul.findAll('li'):
|
||||
a = li.a
|
||||
if a is not None:
|
||||
url = a['href']
|
||||
if not self.exclude_url(url):
|
||||
if url.startswith('/'):
|
||||
url = self.newyorker_prefix+url
|
||||
if a.br is not None:
|
||||
a.br.replaceWith(' ')
|
||||
title = '>>'+self.tag_to_string(a)
|
||||
article_list.append(dict(title=title,url=url,date='',description='',author='',content=''))
|
||||
for h3 in soup.findAll('h3','header'):
|
||||
a = h3.a
|
||||
if a is not None:
|
||||
url = a['href']
|
||||
if not self.exclude_url(url):
|
||||
if url.startswith('/'):
|
||||
url = self.newyorker_prefix+url
|
||||
byline = h3.span
|
||||
if byline is not None:
|
||||
author = self.tag_to_string(byline)
|
||||
if author.startswith('by '):
|
||||
author = author.replace('by ','')
|
||||
byline.extract()
|
||||
else:
|
||||
author = ''
|
||||
if h3.br is not None:
|
||||
h3.br.replaceWith(' ')
|
||||
title = self.tag_to_string(h3).strip()
|
||||
article_list.append(dict(title=title,url=url,date='',description='',author=author,content=''))
|
||||
return article_list
|
||||
|
||||
def load_global_section(self,securl):
|
||||
article_list = []
|
||||
try:
|
||||
soup = self.index_to_soup(securl)
|
||||
except:
|
||||
return article_list
|
||||
if '/blogs/' not in securl:
|
||||
return self.load_index_page(soup)
|
||||
for div in soup.findAll('div',attrs={'id':re.compile('^entry')}):
|
||||
h3 = div.h3
|
||||
if h3 is not None:
|
||||
a = h3.a
|
||||
if a is not None:
|
||||
url = a['href']
|
||||
if not self.exclude_url(url):
|
||||
if url.startswith('/'):
|
||||
url = self.newyorker_prefix+url
|
||||
if h3.br is not None:
|
||||
h3.br.replaceWith(' ')
|
||||
title = self.tag_to_string(h3)
|
||||
article_list.append(dict(title=title,url=url,date='',description='',author='',content=''))
|
||||
return article_list
|
||||
|
||||
def filter_ans(self, ans) :
|
||||
total_article_count = 0
|
||||
idx = 0
|
||||
idx_max = len(ans)-1
|
||||
while idx <= idx_max:
|
||||
if True: #self.verbose
|
||||
self.log("Section %s: %d articles" % (ans[idx][0], len(ans[idx][1])) )
|
||||
for article in ans[idx][1]:
|
||||
total_article_count += 1
|
||||
if True: #self.verbose
|
||||
self.log("\t%-40.40s... \t%-60.60s..." % (article['title'].encode('cp1252','replace'),
|
||||
article['url'].replace('http://m.newyorker.com','').encode('cp1252','replace')))
|
||||
idx = idx+1
|
||||
self.log( "Queued %d articles" % total_article_count )
|
||||
return ans
|
||||
|
||||
|
||||
def parse_index(self):
|
||||
ans = []
|
||||
try:
|
||||
soup = self.index_to_soup(self.newyorker_prefix)
|
||||
except:
|
||||
return ans
|
||||
seclist = self.load_global_nav(soup)
|
||||
ans.append(('Front Page',self.load_index_page(soup)))
|
||||
for (sectitle,securl) in seclist:
|
||||
ans.append((sectitle,self.load_global_section(securl)))
|
||||
return self.filter_ans(ans)
|
||||
|
||||
|
@ -12,6 +12,7 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
|
||||
max_articles_per_feed = 20
|
||||
#auto_cleanup = True
|
||||
language = 'en_GB'
|
||||
compress_news_images = True
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('http://www.nme.com/component/subscribe')
|
||||
@ -27,7 +28,7 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
|
||||
br.open_novisit(cov2)
|
||||
cover_url = str(cov2)
|
||||
except:
|
||||
cover_url = 'http://tawanda3000.files.wordpress.com/2011/02/nme-logo.jpg'
|
||||
cover_url = 'http://tawanda3000.files.wordpress.com/2011/02/nme-logo.jpg'
|
||||
return cover_url
|
||||
|
||||
masthead_url = 'http://tawanda3000.files.wordpress.com/2011/02/nme-logo.jpg'
|
||||
|
@ -11,7 +11,8 @@ class PsychologyToday(BasicNewsRecipe):
|
||||
language = 'en'
|
||||
category = 'news'
|
||||
encoding = 'UTF-8'
|
||||
keep_only_tags = [dict(attrs={'class':['print-title', 'print-submitted', 'print-content', 'print-footer', 'print-source_url', 'print-links']})]
|
||||
auto_cleanup = True
|
||||
#keep_only_tags = [dict(attrs={'class':['print-title', 'print-submitted', 'print-content', 'print-footer', 'print-source_url', 'print-links']})]
|
||||
no_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
@ -31,50 +32,32 @@ class PsychologyToday(BasicNewsRecipe):
|
||||
self.timefmt = u' [%s]'%date
|
||||
|
||||
articles = []
|
||||
for post in div.findAll('div', attrs={'class':'collections-node-feature-info'}):
|
||||
for post in div.findAll('div', attrs={'class':'collections-node-feature collection-node-even'}):
|
||||
title = self.tag_to_string(post.find('h2'))
|
||||
author_item=post.find('div', attrs={'class':'collection-node-byline'})
|
||||
author = re.sub(r'.*by\s',"",self.tag_to_string(author_item).strip())
|
||||
title = title + u' (%s)'%author
|
||||
article_page= self.index_to_soup('http://www.psychologytoday.com'+post.find('a', href=True)['href'])
|
||||
print_page=article_page.find('li', attrs={'class':'print_html first'})
|
||||
url='http://www.psychologytoday.com'+print_page.find('a',href=True)['href']
|
||||
url= 'http://www.psychologytoday.com'+post.find('a', href=True)['href']
|
||||
#print_page=article_page.find('li', attrs={'class':'print_html first'})
|
||||
#url='http://www.psychologytoday.com'+print_page.find('a',href=True)['href']
|
||||
desc = self.tag_to_string(post.find('div', attrs={'class':'collection-node-description'})).strip()
|
||||
self.log('Found article:', title)
|
||||
self.log('\t', url)
|
||||
self.log('\t', desc)
|
||||
articles.append({'title':title, 'url':url, 'date':'','description':desc})
|
||||
for post in div.findAll('div', attrs={'class':'collections-node-feature collection-node-odd'}):
|
||||
title = self.tag_to_string(post.find('h2'))
|
||||
author_item=post.find('div', attrs={'class':'collection-node-byline'})
|
||||
author = re.sub(r'.*by\s',"",self.tag_to_string(author_item).strip())
|
||||
title = title + u' (%s)'%author
|
||||
url= 'http://www.psychologytoday.com'+post.find('a', href=True)['href']
|
||||
#print_page=article_page.find('li', attrs={'class':'print_html first'})
|
||||
#url='http://www.psychologytoday.com'+print_page.find('a',href=True)['href']
|
||||
desc = self.tag_to_string(post.find('div', attrs={'class':'collection-node-description'})).strip()
|
||||
self.log('Found article:', title)
|
||||
self.log('\t', url)
|
||||
self.log('\t', desc)
|
||||
articles.append({'title':title, 'url':url, 'date':'','description':desc})
|
||||
|
||||
for post in div.findAll('div', attrs={'class':'collections-node-thumbnail-info'}):
|
||||
title = self.tag_to_string(post.find('h2'))
|
||||
author_item=post.find('div', attrs={'class':'collection-node-byline'})
|
||||
article_page= self.index_to_soup('http://www.psychologytoday.com'+post.find('a', href=True)['href'])
|
||||
print_page=article_page.find('li', attrs={'class':'print_html first'})
|
||||
description = post.find('div', attrs={'class':'collection-node-description'})
|
||||
author = re.sub(r'.*by\s',"",self.tag_to_string(description.nextSibling).strip())
|
||||
desc = self.tag_to_string(description).strip()
|
||||
url='http://www.psychologytoday.com'+print_page.find('a',href=True)['href']
|
||||
title = title + u' (%s)'%author
|
||||
self.log('Found article:', title)
|
||||
self.log('\t', url)
|
||||
self.log('\t', desc)
|
||||
articles.append({'title':title, 'url':url, 'date':'','description':desc})
|
||||
|
||||
for post in div.findAll('li', attrs={'class':['collection-item-list-odd','collection-item-list-even']}):
|
||||
title = self.tag_to_string(post.find('h2'))
|
||||
author_item=post.find('div', attrs={'class':'collection-node-byline'})
|
||||
author = re.sub(r'.*by\s',"",self.tag_to_string(author_item).strip())
|
||||
title = title + u' (%s)'%author
|
||||
article_page= self.index_to_soup('http://www.psychologytoday.com'+post.find('a', href=True)['href'])
|
||||
print_page=article_page.find('li', attrs={'class':'print_html first'})
|
||||
if print_page is not None:
|
||||
url='http://www.psychologytoday.com'+print_page.find('a',href=True)['href']
|
||||
desc = self.tag_to_string(post.find('div', attrs={'class':'collection-node-description'})).strip()
|
||||
self.log('Found article:', title)
|
||||
self.log('\t', url)
|
||||
self.log('\t', desc)
|
||||
articles.append({'title':title, 'url':url, 'date':'','description':desc})
|
||||
|
||||
return [('Current Issue', articles)]
|
||||
|
||||
|
||||
|
@ -7,7 +7,6 @@ sfgate.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import re
|
||||
|
||||
class SanFranciscoChronicle(BasicNewsRecipe):
|
||||
title = u'San Francisco Chronicle'
|
||||
@ -19,16 +18,7 @@ class SanFranciscoChronicle(BasicNewsRecipe):
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
|
||||
|
||||
|
||||
remove_tags_before = {'id':'printheader'}
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div',attrs={'id':'printheader'})
|
||||
,dict(name='a', attrs={'href':re.compile('http://ads\.pheedo\.com.*')})
|
||||
,dict(name='div',attrs={'id':'footer'})
|
||||
]
|
||||
auto_cleanup = True
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family :Arial,Helvetica,sans-serif; font-size:large;}
|
||||
@ -43,33 +33,13 @@ class SanFranciscoChronicle(BasicNewsRecipe):
|
||||
'''
|
||||
|
||||
feeds = [
|
||||
(u'Top News Stories', u'http://www.sfgate.com/rss/feeds/news.xml')
|
||||
(u'Bay Area News', u'http://www.sfgate.com/bayarea/feed/Bay-Area-News-429.php'),
|
||||
(u'City Insider', u'http://www.sfgate.com/default/feed/City-Insider-Blog-573.php'),
|
||||
(u'Crime Scene', u'http://www.sfgate.com/rss/feed/Crime-Scene-Blog-599.php'),
|
||||
(u'Education News', u'http://www.sfgate.com/education/feed/Education-News-from-SFGate-430.php'),
|
||||
(u'National News', u'http://www.sfgate.com/rss/feed/National-News-RSS-Feed-435.php'),
|
||||
(u'Weird News', u'http://www.sfgate.com/weird/feed/Weird-News-RSS-Feed-433.php'),
|
||||
(u'World News', u'http://www.sfgate.com/rss/feed/World-News-From-SFGate-432.php'),
|
||||
]
|
||||
|
||||
def print_version(self,url):
|
||||
url= url +"&type=printable"
|
||||
return url
|
||||
|
||||
def get_article_url(self, article):
|
||||
print str(article['title_detail']['value'])
|
||||
url = article.get('guid',None)
|
||||
url = "http://www.sfgate.com/cgi-bin/article.cgi?f="+url
|
||||
if "Presented By:" in str(article['title_detail']['value']):
|
||||
url = ''
|
||||
return url
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -50,6 +50,10 @@ class ScienceNewsIssue(BasicNewsRecipe):
|
||||
dict(name='ul', attrs={'id':'toc'})
|
||||
]
|
||||
|
||||
remove_tags= [ dict(name='a', attrs={'class':'enlarge print-no'}),
|
||||
dict(name='a', attrs={'rel':'shadowbox'})
|
||||
]
|
||||
|
||||
feeds = [(u"Science News Current Issues", u'http://www.sciencenews.org/view/feed/type/edition/name/issues.rss')]
|
||||
|
||||
match_regexps = [
|
||||
@ -57,6 +61,12 @@ class ScienceNewsIssue(BasicNewsRecipe):
|
||||
r'www.sciencenews.org/view/generic/id'
|
||||
]
|
||||
|
||||
def image_url_processor(self, baseurl, url):
|
||||
x = url.split('/')
|
||||
if x[4] == u'scale':
|
||||
url = u'http://www.sciencenews.org/view/download/id/' + x[6] + u'/name/' + x[-1]
|
||||
return url
|
||||
|
||||
def get_cover_url(self):
|
||||
cover_url = None
|
||||
index = 'http://www.sciencenews.org/view/home'
|
||||
@ -64,7 +74,6 @@ class ScienceNewsIssue(BasicNewsRecipe):
|
||||
link_item = soup.find(name = 'img',alt = "issue")
|
||||
if link_item:
|
||||
cover_url = 'http://www.sciencenews.org' + link_item['src'] + '.jpg'
|
||||
|
||||
return cover_url
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
|
70
recipes/sportowefakty.recipe
Normal file
@ -0,0 +1,70 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.utils.magick import Image
|
||||
|
||||
class sportowefakty(BasicNewsRecipe):
|
||||
title = u'SportoweFakty'
|
||||
__author__ = 'Artur Stachecki <artur.stachecki@gmail.com>, Tomasz Długosz <tomek3d@gmail.com>'
|
||||
language = 'pl'
|
||||
description = u'Najważniejsze informacje sportowe z kraju i ze świata, relacje, komentarze, wywiady, zdjęcia!'
|
||||
oldest_article = 1
|
||||
masthead_url='http://www.sportowefakty.pl/images/logo.png'
|
||||
max_articles_per_feed = 100
|
||||
simultaneous_downloads = 5
|
||||
use_embedded_content=False
|
||||
remove_javascript=True
|
||||
no_stylesheets=True
|
||||
ignore_duplicate_articles = {'title', 'url'}
|
||||
|
||||
keep_only_tags = [dict(attrs = {'class' : 'box-article'})]
|
||||
remove_tags =[]
|
||||
remove_tags.append(dict(attrs = {'class' : re.compile(r'^newsStream')}))
|
||||
remove_tags.append(dict(attrs = {'target' : '_blank'}))
|
||||
|
||||
feeds = [
|
||||
(u'Piłka Nożna', u'http://www.sportowefakty.pl/pilka-nozna/index.rss'),
|
||||
(u'Koszykówka', u'http://www.sportowefakty.pl/koszykowka/index.rss'),
|
||||
(u'Żużel', u'http://www.sportowefakty.pl/zuzel/index.rss'),
|
||||
(u'Siatkówka', u'http://www.sportowefakty.pl/siatkowka/index.rss'),
|
||||
(u'Zimowe', u'http://www.sportowefakty.pl/zimowe/index.rss'),
|
||||
(u'Hokej', u'http://www.sportowefakty.pl/hokej/index.rss'),
|
||||
(u'Moto', u'http://www.sportowefakty.pl/moto/index.rss'),
|
||||
(u'Tenis', u'http://www.sportowefakty.pl/tenis/index.rss')
|
||||
]
|
||||
|
||||
def get_article_url(self, article):
|
||||
link = article.get('link', None)
|
||||
if 'utm_source' in link:
|
||||
return link.split('?utm')[0]
|
||||
else:
|
||||
return link
|
||||
|
||||
def print_version(self, url):
|
||||
print_url = url + '/drukuj'
|
||||
return print_url
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
head = soup.find('h1')
|
||||
if 'Fotorelacja' in self.tag_to_string(head):
|
||||
return None
|
||||
else:
|
||||
for alink in soup.findAll('a'):
|
||||
if alink.string is not None:
|
||||
tstr = alink.string
|
||||
alink.replaceWith(tstr)
|
||||
return soup
|
||||
|
||||
def postprocess_html(self, soup, first):
|
||||
for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
|
||||
iurl = tag['src']
|
||||
img = Image()
|
||||
img.open(iurl)
|
||||
if img < 0:
|
||||
raise RuntimeError('Out of memory')
|
||||
img.type = "GrayscaleType"
|
||||
img.save(iurl)
|
||||
return soup
|
@ -1,8 +1,11 @@
|
||||
#!/usr/bin/env python
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'Lorenzo Vigentini'
|
||||
__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
|
||||
description = 'the Escapist Magazine - v1.02 (09, January 2010)'
|
||||
__author__ = 'Lorenzo Vigentini and Tom Surace'
|
||||
__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>, 2013 Tom Surace <tekhedd@byteheaven.net>'
|
||||
description = 'The Escapist Magazine - v1.3 (2013, April 2013)'
|
||||
|
||||
#
|
||||
# Based on 'the Escapist Magazine - v1.02 (09, January 2010)'
|
||||
|
||||
'''
|
||||
http://www.escapistmagazine.com/
|
||||
@ -11,12 +14,11 @@ http://www.escapistmagazine.com/
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class al(BasicNewsRecipe):
|
||||
author = 'Lorenzo Vigentini'
|
||||
author = 'Lorenzo Vigentini and Tom Surace'
|
||||
description = 'The Escapist Magazine'
|
||||
|
||||
cover_url = 'http://cdn.themis-media.com/themes/escapistmagazine/default/images/logo.png'
|
||||
title = u'The Escapist Magazine'
|
||||
publisher = 'Themis media'
|
||||
publisher = 'Themis Media'
|
||||
category = 'Video games news, lifestyle, gaming culture'
|
||||
|
||||
language = 'en'
|
||||
@ -36,18 +38,19 @@ class al(BasicNewsRecipe):
|
||||
]
|
||||
|
||||
def print_version(self,url):
|
||||
# Expect article url in the format:
|
||||
# http://www.escapistmagazine.com/news/view/123198-article-name?utm_source=rss&utm_medium=rss&utm_campaign=news
|
||||
#
|
||||
baseURL='http://www.escapistmagazine.com'
|
||||
segments = url.split('/')
|
||||
#basename = '/'.join(segments[:3]) + '/'
|
||||
subPath= '/'+ segments[3] + '/'
|
||||
articleURL=(segments[len(segments)-1])[0:5]
|
||||
|
||||
if articleURL[4] =='-':
|
||||
articleURL=articleURL[:4]
|
||||
# The article number is the "number" that starts the name
|
||||
articleNumber = segments[len(segments)-1]; # the "article name"
|
||||
articleNumber = articleNumber.split('-')[0]; # keep part before hyphen
|
||||
|
||||
printVerString='print/'+ articleURL
|
||||
s= baseURL + subPath + printVerString
|
||||
return s
|
||||
fullUrl = baseURL + subPath + 'print/' + articleNumber
|
||||
return fullUrl
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':'article'})
|
||||
|
11
recipes/the_feature.recipe
Normal file
@ -0,0 +1,11 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1365777047(BasicNewsRecipe):
|
||||
title = u'The Feature'
|
||||
__author__ = 'Jose Pinto'
|
||||
language = 'en'
|
||||
oldest_article = 30
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
use_embedded_content = False
|
||||
feeds = [(u'Latest', u'http://thefeature.net/rss/links')]
|
@ -20,7 +20,7 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
|
||||
ignore_duplicate_articles = {'title','url'}
|
||||
|
||||
compress_news_images = True
|
||||
|
||||
extra_css = '''
|
||||
body{ text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
|
||||
|
@ -1,5 +1,5 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2009-2013, Darko Miletic <darko.miletic at gmail.com>'
|
||||
|
||||
'''
|
||||
theonion.com
|
||||
@ -10,7 +10,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class TheOnion(BasicNewsRecipe):
|
||||
title = 'The Onion'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = "America's finest news source"
|
||||
description = "The Onion, America's Finest News Source, is an award-winning publication covering world, national, and * local issues. It is updated daily online and distributed weekly in select American cities."
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
publisher = 'Onion, Inc.'
|
||||
@ -20,7 +20,8 @@ class TheOnion(BasicNewsRecipe):
|
||||
use_embedded_content = False
|
||||
encoding = 'utf-8'
|
||||
publication_type = 'newsportal'
|
||||
masthead_url = 'http://o.onionstatic.com/img/headers/onion_190.png'
|
||||
needs_subscription = 'optional'
|
||||
masthead_url = 'http://www.theonion.com/static/onion/img/logo_1x.png'
|
||||
extra_css = """
|
||||
body{font-family: Helvetica,Arial,sans-serif}
|
||||
.section_title{color: gray; text-transform: uppercase}
|
||||
@ -37,18 +38,12 @@ class TheOnion(BasicNewsRecipe):
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='h2', attrs={'class':['section_title','title']})
|
||||
,dict(attrs={'class':['main_image','meta','article_photo_lead','article_body']})
|
||||
,dict(attrs={'id':['entries']})
|
||||
]
|
||||
remove_attributes=['lang','rel']
|
||||
remove_tags_after = dict(attrs={'class':['article_body','feature_content']})
|
||||
remove_tags = [
|
||||
dict(name=['object','link','iframe','base','meta'])
|
||||
,dict(name='div', attrs={'class':['toolbar_side','graphical_feature','toolbar_bottom']})
|
||||
,dict(name='div', attrs={'id':['recent_slider','sidebar','pagination','related_media']})
|
||||
]
|
||||
keep_only_tags = [dict(attrs={'class':'full-article'})]
|
||||
remove_attributes = ['lang','rel']
|
||||
remove_tags = [
|
||||
dict(name=['object','link','iframe','base','meta'])
|
||||
,dict(attrs={'class':lambda x: x and 'share-tools' in x.split()})
|
||||
]
|
||||
|
||||
|
||||
feeds = [
|
||||
@ -56,6 +51,17 @@ class TheOnion(BasicNewsRecipe):
|
||||
,(u'Sports' , u'http://feeds.theonion.com/theonion/sports' )
|
||||
]
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
br.open('http://www.theonion.com/')
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('https://ui.ppjol.com/login/onion/u/j_spring_security_check')
|
||||
br.select_form(name='f')
|
||||
br['j_username'] = self.username
|
||||
br['j_password'] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
def get_article_url(self, article):
|
||||
artl = BasicNewsRecipe.get_article_url(self, article)
|
||||
if artl.startswith('http://www.theonion.com/audio/'):
|
||||
@ -79,4 +85,8 @@ class TheOnion(BasicNewsRecipe):
|
||||
else:
|
||||
str = self.tag_to_string(item)
|
||||
item.replaceWith(str)
|
||||
for item in soup.findAll('img'):
|
||||
if item.has_key('data-src'):
|
||||
item['src'] = item['data-src']
|
||||
return soup
|
||||
|
||||
|
@ -1,7 +1,5 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2013, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
tomshardware.com/us
|
||||
'''
|
||||
@ -16,22 +14,20 @@ class Tomshardware(BasicNewsRecipe):
|
||||
publisher = "Tom's Hardware"
|
||||
category = 'news, IT, hardware, USA'
|
||||
no_stylesheets = True
|
||||
needs_subscription = True
|
||||
language = 'en'
|
||||
|
||||
needs_subscription = 'optional'
|
||||
language = 'en'
|
||||
INDEX = 'http://www.tomshardware.com'
|
||||
LOGIN = INDEX + '/membres/'
|
||||
remove_javascript = True
|
||||
use_embedded_content= False
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
br.open(self.INDEX+'/us/')
|
||||
@ -50,8 +46,8 @@ class Tomshardware(BasicNewsRecipe):
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Latest Articles', u'http://www.tomshardware.com/feeds/atom/tom-s-hardware-us,18-2.xml' )
|
||||
,(u'Latest News' , u'http://www.tomshardware.com/feeds/atom/tom-s-hardware-us,18-1.xml')
|
||||
(u'Reviews', u'http://www.tomshardware.com/feeds/rss2/tom-s-hardware-us,18-2.xml')
|
||||
,(u'News' , u'http://www.tomshardware.com/feeds/rss2/tom-s-hardware-us,18-1.xml')
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
|
@ -1,5 +1,6 @@
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class TVXS(BasicNewsRecipe):
|
||||
@ -8,19 +9,30 @@ class TVXS(BasicNewsRecipe):
|
||||
description = 'News from Greece'
|
||||
max_articles_per_feed = 100
|
||||
oldest_article = 3
|
||||
simultaneous_downloads = 1
|
||||
publisher = 'TVXS'
|
||||
category = 'news, GR'
|
||||
category = 'news, sport, greece'
|
||||
language = 'el'
|
||||
encoding = None
|
||||
use_embedded_content = False
|
||||
remove_empty_feeds = True
|
||||
#conversion_options = { 'linearize_tables': True}
|
||||
conversion_options = {'smarten_punctuation': True}
|
||||
no_stylesheets = True
|
||||
publication_type = 'newspaper'
|
||||
remove_tags_before = dict(name='h1',attrs={'class':'print-title'})
|
||||
remove_tags_after = dict(name='div',attrs={'class':'field field-type-relevant-content field-field-relevant-articles'})
|
||||
remove_attributes = ['width', 'src', 'header', 'footer']
|
||||
|
||||
remove_tags = [dict(name='div',attrs={'class':'field field-type-relevant-content field-field-relevant-articles'}),
|
||||
dict(name='div',attrs={'class':'field field-type-filefield field-field-image-gallery'}),
|
||||
dict(name='div',attrs={'class':'filefield-file'})]
|
||||
remove_attributes = ['border', 'cellspacing', 'align', 'cellpadding', 'colspan', 'valign', 'vspace', 'hspace', 'alt', 'width', 'height']
|
||||
extra_css = 'body { font-family: verdana, helvetica, sans-serif; } \
|
||||
table { width: 100%; } \
|
||||
td img { display: block; margin: 5px auto; } \
|
||||
ul { padding-top: 10px; } \
|
||||
ol { padding-top: 10px; } \
|
||||
li { padding-top: 5px; padding-bottom: 5px; } \
|
||||
h1 { text-align: center; font-size: 125%; font-weight: bold; } \
|
||||
h2, h3, h4, h5, h6 { text-align: center; font-size: 100%; font-weight: bold; }'
|
||||
preprocess_regexps = [(re.compile(r'<br[ ]*/>', re.IGNORECASE), lambda m: ''), (re.compile(r'<br[ ]*clear.*/>', re.IGNORECASE), lambda m: '')]
|
||||
|
||||
feeds = [(u'Ελλάδα', 'http://tvxs.gr/feeds/2/feed.xml'),
|
||||
(u'Κόσμος', 'http://tvxs.gr/feeds/5/feed.xml'),
|
||||
@ -35,17 +47,10 @@ class TVXS(BasicNewsRecipe):
|
||||
(u'Ιστορία', 'http://tvxs.gr/feeds/1573/feed.xml'),
|
||||
(u'Χιούμορ', 'http://tvxs.gr/feeds/692/feed.xml')]
|
||||
|
||||
|
||||
def print_version(self, url):
|
||||
import urllib2, urlparse, StringIO, gzip
|
||||
|
||||
fp = urllib2.urlopen(url)
|
||||
data = fp.read()
|
||||
if fp.info()['content-encoding'] == 'gzip':
|
||||
gzip_data = StringIO.StringIO(data)
|
||||
gzipper = gzip.GzipFile(fileobj=gzip_data)
|
||||
data = gzipper.read()
|
||||
fp.close()
|
||||
br = self.get_browser()
|
||||
response = br.open(url)
|
||||
data = response.read()
|
||||
|
||||
pos_1 = data.find('<a href="/print/')
|
||||
if pos_1 == -1:
|
||||
@ -57,5 +62,5 @@ class TVXS(BasicNewsRecipe):
|
||||
pos_1 += len('<a href="')
|
||||
new_url = data[pos_1:pos_2]
|
||||
|
||||
print_url = urlparse.urljoin(url, new_url)
|
||||
print_url = "http://tvxs.gr" + new_url
|
||||
return print_url
|
||||
|
17
recipes/universe_today.recipe
Normal file
@ -0,0 +1,17 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class UniverseToday(BasicNewsRecipe):
|
||||
title = u'Universe Today'
|
||||
language = 'en'
|
||||
description = u'Space and astronomy news.'
|
||||
__author__ = 'seird'
|
||||
publisher = u'universetoday.com'
|
||||
category = 'science, astronomy, news, rss'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 40
|
||||
auto_cleanup = True
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
remove_empty_feeds = True
|
||||
|
||||
feeds = [(u'Universe Today', u'http://feeds.feedburner.com/universetoday/pYdq')]
|
@ -6,17 +6,62 @@ __license__ = 'GPL v3'
|
||||
www.canada.com
|
||||
'''
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
from calibre.ebooks.BeautifulSoup import Tag, BeautifulStoneSoup
|
||||
|
||||
|
||||
class TimesColonist(BasicNewsRecipe):
|
||||
|
||||
# Customization -- remove sections you don't want.
|
||||
# If your e-reader is an e-ink Kindle and your output profile is
|
||||
# set properly this recipe will not include images because the
|
||||
# resulting file is too large. If you have one of these and want
|
||||
# images you can set kindle_omit_images = False
|
||||
# and remove sections (typically the e-ink Kindles will
|
||||
# work with about a dozen of these, but your mileage may vary).
|
||||
|
||||
kindle_omit_images = True
|
||||
|
||||
section_list = [
|
||||
('','Web Front Page'),
|
||||
('news/','News Headlines'),
|
||||
('news/b-c/','BC News'),
|
||||
('news/national/','National News'),
|
||||
('news/world/','World News'),
|
||||
('opinion/','Opinion'),
|
||||
('opinion/letters/','Letters'),
|
||||
('business/','Business'),
|
||||
('business/money/','Money'),
|
||||
('business/technology/','Technology'),
|
||||
('business/working/','Working'),
|
||||
('sports/','Sports'),
|
||||
('sports/hockey/','Hockey'),
|
||||
('sports/football/','Football'),
|
||||
('sports/basketball/','Basketball'),
|
||||
('sports/golf/','Golf'),
|
||||
('entertainment/','entertainment'),
|
||||
('entertainment/go/','Go!'),
|
||||
('entertainment/music/','Music'),
|
||||
('entertainment/books/','Books'),
|
||||
('entertainment/Movies/','Movies'),
|
||||
('entertainment/television/','Television'),
|
||||
('life/','Life'),
|
||||
('life/health/','Health'),
|
||||
('life/travel/','Travel'),
|
||||
('life/driving/','Driving'),
|
||||
('life/homes/','Homes'),
|
||||
('life/food-drink/','Food & Drink')
|
||||
]
|
||||
|
||||
title = u'Victoria Times Colonist'
|
||||
url_prefix = 'http://www.timescolonist.com'
|
||||
description = u'News from Victoria, BC'
|
||||
fp_tag = 'CAN_TC'
|
||||
|
||||
masthead_url = 'http://www.timescolonist.com/gmg/img/global/logoTimesColonist.png'
|
||||
|
||||
|
||||
url_list = []
|
||||
language = 'en_CA'
|
||||
__author__ = 'Nick Redding'
|
||||
@ -29,15 +74,21 @@ class TimesColonist(BasicNewsRecipe):
|
||||
.caption { font-size: xx-small; font-style: italic; font-weight: normal; }
|
||||
'''
|
||||
keep_only_tags = [dict(name='div', attrs={'class':re.compile('main.content')})]
|
||||
remove_tags = [{'class':'comments'},
|
||||
{'id':'photocredit'},
|
||||
dict(name='div', attrs={'class':re.compile('top.controls')}),
|
||||
dict(name='div', attrs={'class':re.compile('social')}),
|
||||
dict(name='div', attrs={'class':re.compile('tools')}),
|
||||
dict(name='div', attrs={'class':re.compile('bottom.tools')}),
|
||||
dict(name='div', attrs={'class':re.compile('window')}),
|
||||
dict(name='div', attrs={'class':re.compile('related.news.element')})]
|
||||
|
||||
def __init__(self, options, log, progress_reporter):
|
||||
self.remove_tags = [{'class':'comments'},
|
||||
{'id':'photocredit'},
|
||||
dict(name='div', attrs={'class':re.compile('top.controls')}),
|
||||
dict(name='div', attrs={'class':re.compile('^comments')}),
|
||||
dict(name='div', attrs={'class':re.compile('social')}),
|
||||
dict(name='div', attrs={'class':re.compile('tools')}),
|
||||
dict(name='div', attrs={'class':re.compile('bottom.tools')}),
|
||||
dict(name='div', attrs={'class':re.compile('window')}),
|
||||
dict(name='div', attrs={'class':re.compile('related.news.element')})]
|
||||
print("PROFILE NAME = "+options.output_profile.short_name)
|
||||
if self.kindle_omit_images and options.output_profile.short_name in ['kindle', 'kindle_dx', 'kindle_pw']:
|
||||
self.remove_tags.append(dict(name='div', attrs={'class':re.compile('image-container')}))
|
||||
BasicNewsRecipe.__init__(self, options, log, progress_reporter)
|
||||
|
||||
def get_cover_url(self):
|
||||
from datetime import timedelta, date
|
||||
@ -122,7 +173,6 @@ class TimesColonist(BasicNewsRecipe):
|
||||
def preprocess_html(self,soup):
|
||||
byline = soup.find('p',attrs={'class':re.compile('ancillary')})
|
||||
if byline is not None:
|
||||
byline.find('a')
|
||||
authstr = self.tag_to_string(byline,False)
|
||||
authstr = re.sub('/ *Times Colonist','/',authstr, flags=re.IGNORECASE)
|
||||
authstr = re.sub('BY */','',authstr, flags=re.IGNORECASE)
|
||||
@ -149,9 +199,10 @@ class TimesColonist(BasicNewsRecipe):
|
||||
atag = htag.a
|
||||
if atag is not None:
|
||||
url = atag['href']
|
||||
#print("Checking "+url)
|
||||
if atag['href'].startswith('/'):
|
||||
url = self.url_prefix+atag['href']
|
||||
url = url.strip()
|
||||
# print("Checking >>"+url+'<<\n\r')
|
||||
if url.startswith('/'):
|
||||
url = self.url_prefix+url
|
||||
if url in self.url_list:
|
||||
return
|
||||
self.url_list.append(url)
|
||||
@ -171,10 +222,10 @@ class TimesColonist(BasicNewsRecipe):
|
||||
if dtag is not None:
|
||||
description = self.tag_to_string(dtag,False)
|
||||
article_list.append(dict(title=title,url=url,date='',description=description,author='',content=''))
|
||||
#print(sectitle+title+": description = "+description+" URL="+url)
|
||||
print(sectitle+title+": description = "+description+" URL="+url+'\n\r')
|
||||
|
||||
def add_section_index(self,ans,securl,sectitle):
|
||||
print("Add section url="+self.url_prefix+'/'+securl)
|
||||
print("Add section url="+self.url_prefix+'/'+securl+'\n\r')
|
||||
try:
|
||||
soup = self.index_to_soup(self.url_prefix+'/'+securl)
|
||||
except:
|
||||
@ -193,33 +244,7 @@ class TimesColonist(BasicNewsRecipe):
|
||||
|
||||
def parse_index(self):
|
||||
ans = []
|
||||
ans = self.add_section_index(ans,'','Web Front Page')
|
||||
ans = self.add_section_index(ans,'news/','News Headlines')
|
||||
ans = self.add_section_index(ans,'news/b-c/','BC News')
|
||||
ans = self.add_section_index(ans,'news/national/','Natioanl News')
|
||||
ans = self.add_section_index(ans,'news/world/','World News')
|
||||
ans = self.add_section_index(ans,'opinion/','Opinion')
|
||||
ans = self.add_section_index(ans,'opinion/letters/','Letters')
|
||||
ans = self.add_section_index(ans,'business/','Business')
|
||||
ans = self.add_section_index(ans,'business/money/','Money')
|
||||
ans = self.add_section_index(ans,'business/technology/','Technology')
|
||||
ans = self.add_section_index(ans,'business/working/','Working')
|
||||
ans = self.add_section_index(ans,'sports/','Sports')
|
||||
ans = self.add_section_index(ans,'sports/hockey/','Hockey')
|
||||
ans = self.add_section_index(ans,'sports/football/','Football')
|
||||
ans = self.add_section_index(ans,'sports/basketball/','Basketball')
|
||||
ans = self.add_section_index(ans,'sports/golf/','Golf')
|
||||
ans = self.add_section_index(ans,'entertainment/','entertainment')
|
||||
ans = self.add_section_index(ans,'entertainment/go/','Go!')
|
||||
ans = self.add_section_index(ans,'entertainment/music/','Music')
|
||||
ans = self.add_section_index(ans,'entertainment/books/','Books')
|
||||
ans = self.add_section_index(ans,'entertainment/Movies/','movies')
|
||||
ans = self.add_section_index(ans,'entertainment/television/','Television')
|
||||
ans = self.add_section_index(ans,'life/','Life')
|
||||
ans = self.add_section_index(ans,'life/health/','Health')
|
||||
ans = self.add_section_index(ans,'life/travel/','Travel')
|
||||
ans = self.add_section_index(ans,'life/driving/','Driving')
|
||||
ans = self.add_section_index(ans,'life/homes/','Homes')
|
||||
ans = self.add_section_index(ans,'life/food-drink/','Food & Drink')
|
||||
for (url,title) in self.section_list:
|
||||
ans = self.add_section_index(ans,url,title)
|
||||
return ans
|
||||
|
||||
|
27
recipes/voice_of_america.recipe
Normal file
@ -0,0 +1,27 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class HindustanTimes(BasicNewsRecipe):
|
||||
title = u'Voice of America'
|
||||
language = 'en'
|
||||
__author__ = 'Krittika Goyal'
|
||||
oldest_article = 15 #days
|
||||
max_articles_per_feed = 25
|
||||
#encoding = 'cp1252'
|
||||
use_embedded_content = False
|
||||
|
||||
no_stylesheets = True
|
||||
auto_cleanup = True
|
||||
|
||||
|
||||
feeds = [
|
||||
('All Zones',
|
||||
'http://learningenglish.voanews.com/rss/?count=20'),
|
||||
('World',
|
||||
'http://learningenglish.voanews.com/rss/?count=20&zoneid=957'),
|
||||
('USA',
|
||||
'http://learningenglish.voanews.com/rss/?count=20&zoneid=958'),
|
||||
('Health',
|
||||
'http://learningenglish.voanews.com/rss/?count=20&zoneid=955'),
|
||||
|
||||
]
|
||||
|
@ -1,144 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class GazetaWyborczaDuzyForma(BasicNewsRecipe):
|
||||
cover_url = 'http://bi.gazeta.pl/im/8/5415/m5415058.gif'
|
||||
title = u"Gazeta Wyborcza Duzy Format"
|
||||
__author__ = 'ravcio - rlelusz[at]gmail.com'
|
||||
description = u"Articles from Gazeta's website"
|
||||
language = 'pl'
|
||||
max_articles_per_feed = 50 #you can increade it event up to maybe 600, should still work
|
||||
recursions = 0
|
||||
encoding = 'iso-8859-2'
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':['k1']})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['zdjM', 'rel_video', 'zdjP', 'rel_box', 'index mod_zi_dolStrony']})
|
||||
,dict(name='div', attrs={'id':['source', 'banP4', 'article_toolbar', 'rel', 'inContext_disabled']})
|
||||
,dict(name='ul', attrs={'id':['articleToolbar']})
|
||||
,dict(name='img', attrs={'class':['brand']})
|
||||
,dict(name='h5', attrs={'class':['author']})
|
||||
,dict(name='h6', attrs={'class':['date']})
|
||||
,dict(name='p', attrs={'class':['txt_upl']})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'id':['Str']}) #nawigator numerow linii
|
||||
]
|
||||
|
||||
def load_article_links(self, url, count):
|
||||
print '--- load_article_links', url, count
|
||||
|
||||
#page with link to articles
|
||||
soup = self.index_to_soup(url)
|
||||
|
||||
#table with articles
|
||||
list = soup.find('div', attrs={'class':'GWdalt'})
|
||||
|
||||
#single articles (link, title, ...)
|
||||
links = list.findAll('div', attrs={'class':['GWdaltE']})
|
||||
|
||||
if len(links) < count:
|
||||
#load links to more articles...
|
||||
|
||||
#remove new link
|
||||
pages_nav = list.find('div', attrs={'class':'pages'})
|
||||
next = pages_nav.find('a', attrs={'class':'next'})
|
||||
if next:
|
||||
print 'next=', next['href']
|
||||
url = 'http://wyborcza.pl' + next['href']
|
||||
#e.g. url = 'http://wyborcza.pl/0,75480.html?str=2'
|
||||
|
||||
older_links = self.load_article_links(url, count - len(links))
|
||||
links.extend(older_links)
|
||||
|
||||
return links
|
||||
|
||||
|
||||
#produce list of articles to download
|
||||
def parse_index(self):
|
||||
print '--- parse_index'
|
||||
|
||||
max_articles = 8000
|
||||
links = self.load_article_links('http://wyborcza.pl/0,75480.html', max_articles)
|
||||
|
||||
ans = []
|
||||
key = None
|
||||
articles = {}
|
||||
|
||||
key = 'Uncategorized'
|
||||
articles[key] = []
|
||||
|
||||
for div_art in links:
|
||||
div_date = div_art.find('div', attrs={'class':'kL'})
|
||||
div = div_art.find('div', attrs={'class':'kR'})
|
||||
|
||||
a = div.find('a', href=True)
|
||||
|
||||
url = a['href']
|
||||
title = a.string
|
||||
description = ''
|
||||
pubdate = div_date.string.rstrip().lstrip()
|
||||
summary = div.find('span', attrs={'class':'lead'})
|
||||
|
||||
desc = summary.find('a', href=True)
|
||||
if desc:
|
||||
desc.extract()
|
||||
|
||||
description = self.tag_to_string(summary, use_alt=False)
|
||||
description = description.rstrip().lstrip()
|
||||
|
||||
feed = key if key is not None else 'Duzy Format'
|
||||
|
||||
if not articles.has_key(feed):
|
||||
articles[feed] = []
|
||||
|
||||
if description != '': # skip just pictures atricle
|
||||
articles[feed].append(
|
||||
dict(title=title, url=url, date=pubdate,
|
||||
description=description,
|
||||
content=''))
|
||||
|
||||
ans = [(key, articles[key])]
|
||||
return ans
|
||||
|
||||
def append_page(self, soup, appendtag, position):
|
||||
pager = soup.find('div',attrs={'id':'Str'})
|
||||
if pager:
|
||||
#seek for 'a' element with nast value (if not found exit)
|
||||
list = pager.findAll('a')
|
||||
|
||||
for elem in list:
|
||||
if 'nast' in elem.string:
|
||||
nexturl = elem['href']
|
||||
|
||||
soup2 = self.index_to_soup('http://warszawa.gazeta.pl' + nexturl)
|
||||
|
||||
texttag = soup2.find('div', attrs={'id':'artykul'})
|
||||
|
||||
newpos = len(texttag.contents)
|
||||
self.append_page(soup2,texttag,newpos)
|
||||
texttag.extract()
|
||||
appendtag.insert(position,texttag)
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
self.append_page(soup, soup.body, 3)
|
||||
|
||||
# finally remove some tags
|
||||
pager = soup.find('div',attrs={'id':'Str'})
|
||||
if pager:
|
||||
pager.extract()
|
||||
|
||||
pager = soup.find('div',attrs={'class':'tylko_int'})
|
||||
if pager:
|
||||
pager.extract()
|
||||
|
||||
return soup
|
57
recipes/wysokie_obcasy.recipe
Normal file
@ -0,0 +1,57 @@
|
||||
#!/usr/bin/env python
|
||||
__license__ = 'GPL v3'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class WysokieObcasyRecipe(BasicNewsRecipe):
|
||||
__author__ = u'Artur Stachecki <artur.stachecki@gmail.com>'
|
||||
language = 'pl'
|
||||
version = 1
|
||||
|
||||
title = u'Wysokie Obcasy'
|
||||
publisher = 'Agora SA'
|
||||
description = u'Serwis sobotniego dodatku do Gazety Wyborczej'
|
||||
category='magazine'
|
||||
language = 'pl'
|
||||
publication_type = 'magazine'
|
||||
cover_url=''
|
||||
remove_empty_feeds= True
|
||||
no_stylesheets=True
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100000
|
||||
recursions = 0
|
||||
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
simultaneous_downloads = 5
|
||||
|
||||
keep_only_tags =[]
|
||||
keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'article'}))
|
||||
|
||||
remove_tags =[]
|
||||
remove_tags.append(dict(name = 'img'))
|
||||
remove_tags.append(dict(name = 'p', attrs = {'class' : 'info'}))
|
||||
|
||||
extra_css = '''
|
||||
body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
|
||||
h1{text-align: left;}
|
||||
'''
|
||||
|
||||
feeds = [
|
||||
('Wszystkie Artykuly', 'feed://www.wysokieobcasy.pl/pub/rss/wysokieobcasy.xml'),
|
||||
]
|
||||
|
||||
def print_version(self,url):
|
||||
baseURL='http://www.wysokieobcasy.pl/wysokie-obcasy'
|
||||
segments = url.split(',')
|
||||
subPath= '/2029020,'
|
||||
articleURL1 = segments[1]
|
||||
articleURL2 = segments[2]
|
||||
printVerString=articleURL1 + ',' + articleURL2
|
||||
s= baseURL + subPath + printVerString + '.html'
|
||||
return s
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('http://www.wysokieobcasy.pl/wysokie-obcasy/0,0.html')
|
||||
self.cover_url = soup.find(attrs={'class':'holder_cr'}).find('img')['src']
|
||||
return getattr(self, 'cover_url', self.cover_url)
|
@ -357,7 +357,7 @@
|
||||
<xsl:apply-templates/>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="rtf:table">
|
||||
<xsl:template match="rtf:table">
|
||||
<xsl:element name="table">
|
||||
<xsl:attribute name="id">
|
||||
<xsl:value-of select="generate-id(.)"/>
|
||||
@ -390,7 +390,6 @@
|
||||
|
||||
|
||||
<xsl:output method = "xml"/>
|
||||
|
||||
<xsl:key name="style-types" match="rtf:paragraph-definition" use="@style-number"/>
|
||||
|
||||
|
||||
@ -415,13 +414,11 @@
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="rtf:page-break">
|
||||
<xsl:element name="br">
|
||||
<xsl:attribute name="style">page-break-after:always</xsl:attribute>
|
||||
</xsl:element>
|
||||
<br style = "page-break-after:always"/>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="rtf:hardline-break">
|
||||
<xsl:element name="br"/>
|
||||
<br/>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="rtf:rtf-definition|rtf:font-table|rtf:color-table|rtf:style-table|rtf:page-definition|rtf:list-table|rtf:override-table|rtf:override-list|rtf:list-text"/>
|
||||
@ -445,14 +442,21 @@
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match = "rtf:field-block">
|
||||
<xsl:apply-templates/>
|
||||
<xsl:apply-templates/>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match = "rtf:field[@type='hyperlink']">
|
||||
<xsl:element name ="a">
|
||||
<xsl:attribute name = "href">
|
||||
<xsl:if test = "not(contains(@link, '/'))">#</xsl:if>
|
||||
<xsl:value-of select = "@link"/>
|
||||
<xsl:choose>
|
||||
<xsl:when test="@argument">
|
||||
<xsl:value-of select="@argument"/>
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<xsl:if test = "not(contains(@link, '/'))">#</xsl:if>
|
||||
<xsl:value-of select = "@link"/>
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</xsl:attribute>
|
||||
<xsl:apply-templates/>
|
||||
</xsl:element>
|
||||
@ -472,9 +476,7 @@
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="rtf:pict">
|
||||
<xsl:element name="img">
|
||||
<xsl:attribute name="src"><xsl:value-of select="@num" /></xsl:attribute>
|
||||
</xsl:element>
|
||||
<img src = "{@num}"/>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="*">
|
||||
|
@ -1,6 +1,3 @@
|
||||
" Project wide builtins
|
||||
let $PYFLAKES_BUILTINS = "_,dynamic_property,__,P,I,lopen,icu_lower,icu_upper,icu_title,ngettext"
|
||||
|
||||
" Include directories for C++ modules
|
||||
let g:syntastic_cpp_include_dirs = [
|
||||
\'/usr/include/python2.7',
|
||||
|
4
setup.cfg
Normal file
@ -0,0 +1,4 @@
|
||||
[flake8]
|
||||
max-line-length = 160
|
||||
builtins = _,dynamic_property,__,P,I,lopen,icu_lower,icu_upper,icu_title,ngettext
|
||||
ignore = E12,E22,E231,E301,E302,E304,E401,W391
|
@ -22,40 +22,12 @@ class Message:
|
||||
self.filename, self.lineno, self.msg = filename, lineno, msg
|
||||
|
||||
def __str__(self):
|
||||
return '%s:%s: %s'%(self.filename, self.lineno, self.msg)
|
||||
|
||||
def check_for_python_errors(code_string, filename):
|
||||
import _ast
|
||||
# First, compile into an AST and handle syntax errors.
|
||||
try:
|
||||
tree = compile(code_string, filename, "exec", _ast.PyCF_ONLY_AST)
|
||||
except (SyntaxError, IndentationError) as value:
|
||||
msg = value.args[0]
|
||||
|
||||
(lineno, offset, text) = value.lineno, value.offset, value.text
|
||||
|
||||
# If there's an encoding problem with the file, the text is None.
|
||||
if text is None:
|
||||
# Avoid using msg, since for the only known case, it contains a
|
||||
# bogus message that claims the encoding the file declared was
|
||||
# unknown.
|
||||
msg = "%s: problem decoding source" % filename
|
||||
|
||||
return [Message(filename, lineno, msg)]
|
||||
else:
|
||||
checker = __import__('pyflakes.checker').checker
|
||||
# Okay, it's syntactically valid. Now check it.
|
||||
w = checker.Checker(tree, filename)
|
||||
w.messages.sort(lambda a, b: cmp(a.lineno, b.lineno))
|
||||
return [Message(x.filename, x.lineno, x.message%x.message_args) for x in
|
||||
w.messages]
|
||||
return '%s:%s: %s' % (self.filename, self.lineno, self.msg)
|
||||
|
||||
class Check(Command):
|
||||
|
||||
description = 'Check for errors in the calibre source code'
|
||||
|
||||
BUILTINS = ['_', '__', 'dynamic_property', 'I', 'P', 'lopen', 'icu_lower',
|
||||
'icu_upper', 'icu_title', 'ngettext']
|
||||
CACHE = '.check-cache.pickle'
|
||||
|
||||
def get_files(self, cache):
|
||||
@ -65,10 +37,10 @@ class Check(Command):
|
||||
mtime = os.stat(y).st_mtime
|
||||
if cache.get(y, 0) == mtime:
|
||||
continue
|
||||
if (f.endswith('.py') and f not in ('feedparser.py',
|
||||
'pyparsing.py', 'markdown.py') and
|
||||
'prs500/driver.py' not in y):
|
||||
yield y, mtime
|
||||
if (f.endswith('.py') and f not in (
|
||||
'feedparser.py', 'markdown.py') and
|
||||
'prs500/driver.py' not in y):
|
||||
yield y, mtime
|
||||
if f.endswith('.coffee'):
|
||||
yield y, mtime
|
||||
|
||||
@ -79,25 +51,22 @@ class Check(Command):
|
||||
if f.endswith('.recipe') and cache.get(f, 0) != mtime:
|
||||
yield f, mtime
|
||||
|
||||
|
||||
def run(self, opts):
|
||||
cache = {}
|
||||
if os.path.exists(self.CACHE):
|
||||
cache = cPickle.load(open(self.CACHE, 'rb'))
|
||||
builtins = list(set_builtins(self.BUILTINS))
|
||||
for f, mtime in self.get_files(cache):
|
||||
self.info('\tChecking', f)
|
||||
errors = False
|
||||
ext = os.path.splitext(f)[1]
|
||||
if ext in {'.py', '.recipe'}:
|
||||
w = check_for_python_errors(open(f, 'rb').read(), f)
|
||||
if w:
|
||||
p = subprocess.Popen(['flake8', '--ignore=E,W', f])
|
||||
if p.wait() != 0:
|
||||
errors = True
|
||||
self.report_errors(w)
|
||||
else:
|
||||
from calibre.utils.serve_coffee import check_coffeescript
|
||||
try:
|
||||
check_coffeescript(f)
|
||||
check_coffeescript(f)
|
||||
except:
|
||||
errors = True
|
||||
if errors:
|
||||
@ -106,8 +75,6 @@ class Check(Command):
|
||||
self.j(self.SRC, '../session.vim'), '-f', f])
|
||||
raise SystemExit(1)
|
||||
cache[f] = mtime
|
||||
for x in builtins:
|
||||
delattr(__builtin__, x)
|
||||
cPickle.dump(cache, open(self.CACHE, 'wb'), -1)
|
||||
wn_path = os.path.expanduser('~/work/servers/src/calibre_servers/main')
|
||||
if os.path.exists(wn_path):
|
||||
|
@ -10253,7 +10253,7 @@ msgstr ""
|
||||
|
||||
#. name for inh
|
||||
msgid "Ingush"
|
||||
msgstr "Engelsk"
|
||||
msgstr "Ingush"
|
||||
|
||||
#. name for inj
|
||||
msgid "Inga; Jungle"
|
||||
|
@ -4,7 +4,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
__appname__ = u'calibre'
|
||||
numeric_version = (0, 9, 25)
|
||||
numeric_version = (0, 9, 27)
|
||||
__version__ = u'.'.join(map(unicode, numeric_version))
|
||||
__author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"
|
||||
|
||||
|
@ -757,9 +757,10 @@ from calibre.ebooks.metadata.sources.isbndb import ISBNDB
|
||||
from calibre.ebooks.metadata.sources.overdrive import OverDrive
|
||||
from calibre.ebooks.metadata.sources.douban import Douban
|
||||
from calibre.ebooks.metadata.sources.ozon import Ozon
|
||||
# from calibre.ebooks.metadata.sources.google_images import GoogleImages
|
||||
from calibre.ebooks.metadata.sources.google_images import GoogleImages
|
||||
from calibre.ebooks.metadata.sources.big_book_search import BigBookSearch
|
||||
|
||||
plugins += [GoogleBooks, Amazon, Edelweiss, OpenLibrary, ISBNDB, OverDrive, Douban, Ozon]
|
||||
plugins += [GoogleBooks, GoogleImages, Amazon, Edelweiss, OpenLibrary, ISBNDB, OverDrive, Douban, Ozon, BigBookSearch]
|
||||
|
||||
# }}}
|
||||
|
||||
@ -1467,6 +1468,17 @@ class StoreKoboStore(StoreBase):
|
||||
formats = ['EPUB']
|
||||
affiliate = True
|
||||
|
||||
class StoreKoobeStore(StoreBase):
|
||||
name = 'Koobe'
|
||||
author = u'Tomasz Długosz'
|
||||
description = u'Księgarnia internetowa oferuje ebooki (książki elektroniczne) w postaci plików epub, mobi i pdf.'
|
||||
actual_plugin = 'calibre.gui2.store.stores.koobe_plugin:KoobeStore'
|
||||
|
||||
drm_free_only = True
|
||||
headquarters = 'PL'
|
||||
formats = ['EPUB', 'MOBI', 'PDF']
|
||||
affiliate = True
|
||||
|
||||
class StoreLegimiStore(StoreBase):
|
||||
name = 'Legimi'
|
||||
author = u'Tomasz Długosz'
|
||||
@ -1649,6 +1661,7 @@ class StoreWoblinkStore(StoreBase):
|
||||
|
||||
headquarters = 'PL'
|
||||
formats = ['EPUB', 'MOBI', 'PDF', 'WOBLINK']
|
||||
affiliate = True
|
||||
|
||||
class XinXiiStore(StoreBase):
|
||||
name = 'XinXii'
|
||||
@ -1686,6 +1699,7 @@ plugins += [
|
||||
StoreGoogleBooksStore,
|
||||
StoreGutenbergStore,
|
||||
StoreKoboStore,
|
||||
StoreKoobeStore,
|
||||
StoreLegimiStore,
|
||||
StoreLibreDEStore,
|
||||
StoreLitResStore,
|
||||
|
@ -91,7 +91,7 @@ def restore_plugin_state_to_default(plugin_or_name):
|
||||
config['enabled_plugins'] = ep
|
||||
|
||||
default_disabled_plugins = set([
|
||||
'Overdrive', 'Douban Books', 'OZON.ru', 'Edelweiss', 'Google Images',
|
||||
'Overdrive', 'Douban Books', 'OZON.ru', 'Edelweiss', 'Google Images', 'Big Book Search',
|
||||
])
|
||||
|
||||
def is_disabled(plugin):
|
||||
|
@ -68,4 +68,5 @@ Various things that require other things before they can be migrated:
|
||||
libraries/switching/on calibre startup.
|
||||
3. From refresh in the legacy interface: Rember to flush the composite
|
||||
column template cache.
|
||||
4. Replace the metadatabackup thread with the new implementation when using the new backend.
|
||||
'''
|
||||
|
@ -41,8 +41,7 @@ Differences in semantics from pysqlite:
|
||||
'''
|
||||
|
||||
|
||||
|
||||
class DynamicFilter(object): # {{{
|
||||
class DynamicFilter(object): # {{{
|
||||
|
||||
'No longer used, present for legacy compatibility'
|
||||
|
||||
@ -57,7 +56,7 @@ class DynamicFilter(object): # {{{
|
||||
self.ids = frozenset(ids)
|
||||
# }}}
|
||||
|
||||
class DBPrefs(dict): # {{{
|
||||
class DBPrefs(dict): # {{{
|
||||
|
||||
'Store preferences as key:value pairs in the db'
|
||||
|
||||
@ -114,9 +113,10 @@ class DBPrefs(dict): # {{{
|
||||
return default
|
||||
|
||||
def set_namespaced(self, namespace, key, val):
|
||||
if u':' in key: raise KeyError('Colons are not allowed in keys')
|
||||
if u':' in namespace: raise KeyError('Colons are not allowed in'
|
||||
' the namespace')
|
||||
if u':' in key:
|
||||
raise KeyError('Colons are not allowed in keys')
|
||||
if u':' in namespace:
|
||||
raise KeyError('Colons are not allowed in the namespace')
|
||||
key = u'namespaced:%s:%s'%(namespace, key)
|
||||
self[key] = val
|
||||
|
||||
@ -170,7 +170,8 @@ def pynocase(one, two, encoding='utf-8'):
|
||||
return cmp(one.lower(), two.lower())
|
||||
|
||||
def _author_to_author_sort(x):
|
||||
if not x: return ''
|
||||
if not x:
|
||||
return ''
|
||||
return author_to_author_sort(x.replace('|', ','))
|
||||
|
||||
def icu_collator(s1, s2):
|
||||
@ -239,9 +240,9 @@ def AumSortedConcatenate():
|
||||
|
||||
# }}}
|
||||
|
||||
class Connection(apsw.Connection): # {{{
|
||||
class Connection(apsw.Connection): # {{{
|
||||
|
||||
BUSY_TIMEOUT = 2000 # milliseconds
|
||||
BUSY_TIMEOUT = 2000 # milliseconds
|
||||
|
||||
def __init__(self, path):
|
||||
apsw.Connection.__init__(self, path)
|
||||
@ -257,7 +258,7 @@ class Connection(apsw.Connection): # {{{
|
||||
self.createscalarfunction('title_sort', title_sort, 1)
|
||||
self.createscalarfunction('author_to_author_sort',
|
||||
_author_to_author_sort, 1)
|
||||
self.createscalarfunction('uuid4', lambda : str(uuid.uuid4()),
|
||||
self.createscalarfunction('uuid4', lambda: str(uuid.uuid4()),
|
||||
0)
|
||||
|
||||
# Dummy functions for dynamically created filters
|
||||
@ -305,7 +306,8 @@ class DB(object):
|
||||
|
||||
# Initialize database {{{
|
||||
|
||||
def __init__(self, library_path, default_prefs=None, read_only=False):
|
||||
def __init__(self, library_path, default_prefs=None, read_only=False,
|
||||
restore_all_prefs=False, progress_callback=lambda x, y:True):
|
||||
try:
|
||||
if isbytestring(library_path):
|
||||
library_path = library_path.decode(filesystem_encoding)
|
||||
@ -376,23 +378,27 @@ class DB(object):
|
||||
UPDATE authors SET sort=author_to_author_sort(name) WHERE sort IS NULL;
|
||||
''')
|
||||
|
||||
self.initialize_prefs(default_prefs)
|
||||
self.initialize_prefs(default_prefs, restore_all_prefs, progress_callback)
|
||||
self.initialize_custom_columns()
|
||||
self.initialize_tables()
|
||||
|
||||
def initialize_prefs(self, default_prefs): # {{{
|
||||
def initialize_prefs(self, default_prefs, restore_all_prefs, progress_callback): # {{{
|
||||
self.prefs = DBPrefs(self)
|
||||
|
||||
if default_prefs is not None and not self._exists:
|
||||
progress_callback(None, len(default_prefs))
|
||||
# Only apply default prefs to a new database
|
||||
for key in default_prefs:
|
||||
for i, key in enumerate(default_prefs):
|
||||
# be sure that prefs not to be copied are listed below
|
||||
if key not in frozenset(['news_to_be_synced']):
|
||||
if restore_all_prefs or key not in frozenset(['news_to_be_synced']):
|
||||
self.prefs[key] = default_prefs[key]
|
||||
progress_callback(_('restored preference ') + key, i+1)
|
||||
if 'field_metadata' in default_prefs:
|
||||
fmvals = [f for f in default_prefs['field_metadata'].values()
|
||||
if f['is_custom']]
|
||||
for f in fmvals:
|
||||
progress_callback(None, len(fmvals))
|
||||
for i, f in enumerate(fmvals):
|
||||
progress_callback(_('creating custom column ') + f['label'], i)
|
||||
self.create_custom_column(f['label'], f['name'],
|
||||
f['datatype'],
|
||||
(f['is_multiple'] is not None and
|
||||
@ -421,6 +427,8 @@ class DB(object):
|
||||
('uuid', False), ('comments', True), ('id', False), ('pubdate', False),
|
||||
('last_modified', False), ('size', False), ('languages', False),
|
||||
]
|
||||
defs['virtual_libraries'] = {}
|
||||
defs['virtual_lib_on_startup'] = defs['cs_virtual_lib_on_startup'] = ''
|
||||
|
||||
# Migrate the bool tristate tweak
|
||||
defs['bools_are_tristate'] = \
|
||||
@ -469,6 +477,24 @@ class DB(object):
|
||||
except:
|
||||
pass
|
||||
|
||||
# migrate the gui_restriction preference to a virtual library
|
||||
gr_pref = self.prefs.get('gui_restriction', None)
|
||||
if gr_pref:
|
||||
virt_libs = self.prefs.get('virtual_libraries', {})
|
||||
virt_libs[gr_pref] = 'search:"' + gr_pref + '"'
|
||||
self.prefs['virtual_libraries'] = virt_libs
|
||||
self.prefs['gui_restriction'] = ''
|
||||
self.prefs['virtual_lib_on_startup'] = gr_pref
|
||||
|
||||
# migrate the cs_restriction preference to a virtual library
|
||||
gr_pref = self.prefs.get('cs_restriction', None)
|
||||
if gr_pref:
|
||||
virt_libs = self.prefs.get('virtual_libraries', {})
|
||||
virt_libs[gr_pref] = 'search:"' + gr_pref + '"'
|
||||
self.prefs['virtual_libraries'] = virt_libs
|
||||
self.prefs['cs_restriction'] = ''
|
||||
self.prefs['cs_virtual_lib_on_startup'] = gr_pref
|
||||
|
||||
# Rename any user categories with names that differ only in case
|
||||
user_cats = self.prefs.get('user_categories', [])
|
||||
catmap = {}
|
||||
@ -493,7 +519,7 @@ class DB(object):
|
||||
self.prefs.set('user_categories', user_cats)
|
||||
# }}}
|
||||
|
||||
def initialize_custom_columns(self): # {{{
|
||||
def initialize_custom_columns(self): # {{{
|
||||
with self.conn:
|
||||
# Delete previously marked custom columns
|
||||
for record in self.conn.get(
|
||||
@ -634,11 +660,11 @@ class DB(object):
|
||||
|
||||
self.custom_data_adapters = {
|
||||
'float': adapt_number,
|
||||
'int': adapt_number,
|
||||
'rating':lambda x,d : x if x is None else min(10., max(0., float(x))),
|
||||
'bool': adapt_bool,
|
||||
'int': adapt_number,
|
||||
'rating':lambda x,d: x if x is None else min(10., max(0., float(x))),
|
||||
'bool': adapt_bool,
|
||||
'comments': lambda x,d: adapt_text(x, {'is_multiple':False}),
|
||||
'datetime' : adapt_datetime,
|
||||
'datetime': adapt_datetime,
|
||||
'text':adapt_text,
|
||||
'series':adapt_text,
|
||||
'enumeration': adapt_enum
|
||||
@ -661,7 +687,7 @@ class DB(object):
|
||||
|
||||
# }}}
|
||||
|
||||
def initialize_tables(self): # {{{
|
||||
def initialize_tables(self): # {{{
|
||||
tables = self.tables = {}
|
||||
for col in ('title', 'sort', 'author_sort', 'series_index', 'comments',
|
||||
'timestamp', 'pubdate', 'uuid', 'path', 'cover',
|
||||
@ -690,11 +716,13 @@ class DB(object):
|
||||
|
||||
tables['size'] = SizeTable('size', self.field_metadata['size'].copy())
|
||||
|
||||
self.FIELD_MAP = {'id':0, 'title':1, 'authors':2, 'timestamp':3,
|
||||
'size':4, 'rating':5, 'tags':6, 'comments':7, 'series':8,
|
||||
'publisher':9, 'series_index':10, 'sort':11, 'author_sort':12,
|
||||
'formats':13, 'path':14, 'pubdate':15, 'uuid':16, 'cover':17,
|
||||
'au_map':18, 'last_modified':19, 'identifiers':20}
|
||||
self.FIELD_MAP = {
|
||||
'id':0, 'title':1, 'authors':2, 'timestamp':3, 'size':4,
|
||||
'rating':5, 'tags':6, 'comments':7, 'series':8, 'publisher':9,
|
||||
'series_index':10, 'sort':11, 'author_sort':12, 'formats':13,
|
||||
'path':14, 'pubdate':15, 'uuid':16, 'cover':17, 'au_map':18,
|
||||
'last_modified':19, 'identifiers':20, 'languages':21,
|
||||
}
|
||||
|
||||
for k,v in self.FIELD_MAP.iteritems():
|
||||
self.field_metadata.set_field_record_index(k, v, prefer_custom=False)
|
||||
@ -740,6 +768,8 @@ class DB(object):
|
||||
self.field_metadata.set_field_record_index('ondevice', base, prefer_custom=False)
|
||||
self.FIELD_MAP['marked'] = base = base+1
|
||||
self.field_metadata.set_field_record_index('marked', base, prefer_custom=False)
|
||||
self.FIELD_MAP['series_sort'] = base = base+1
|
||||
self.field_metadata.set_field_record_index('series_sort', base, prefer_custom=False)
|
||||
|
||||
# }}}
|
||||
|
||||
@ -753,6 +783,11 @@ class DB(object):
|
||||
self._conn = Connection(self.dbpath)
|
||||
return self._conn
|
||||
|
||||
def close(self):
|
||||
if self._conn is not None:
|
||||
self._conn.close()
|
||||
del self._conn
|
||||
|
||||
@dynamic_property
|
||||
def user_version(self):
|
||||
doc = 'The user version of this database'
|
||||
@ -866,8 +901,8 @@ class DB(object):
|
||||
Read all data from the db into the python in-memory tables
|
||||
'''
|
||||
|
||||
with self.conn: # Use a single transaction, to ensure nothing modifies
|
||||
# the db while we are reading
|
||||
with self.conn: # Use a single transaction, to ensure nothing modifies
|
||||
# the db while we are reading
|
||||
for table in self.tables.itervalues():
|
||||
try:
|
||||
table.read(self)
|
||||
@ -885,7 +920,7 @@ class DB(object):
|
||||
return fmt_path
|
||||
try:
|
||||
candidates = glob.glob(os.path.join(path, '*'+fmt))
|
||||
except: # If path contains strange characters this throws an exc
|
||||
except: # If path contains strange characters this throws an exc
|
||||
candidates = []
|
||||
if fmt and candidates and os.path.exists(candidates[0]):
|
||||
shutil.copyfile(candidates[0], fmt_path)
|
||||
@ -954,7 +989,7 @@ class DB(object):
|
||||
if path != dest:
|
||||
os.rename(path, dest)
|
||||
except:
|
||||
pass # Nothing too catastrophic happened, the cases mismatch, that's all
|
||||
pass # Nothing too catastrophic happened, the cases mismatch, that's all
|
||||
else:
|
||||
windows_atomic_move.copy_path_to(path, dest)
|
||||
else:
|
||||
@ -970,7 +1005,7 @@ class DB(object):
|
||||
try:
|
||||
os.rename(path, dest)
|
||||
except:
|
||||
pass # Nothing too catastrophic happened, the cases mismatch, that's all
|
||||
pass # Nothing too catastrophic happened, the cases mismatch, that's all
|
||||
else:
|
||||
if use_hardlink:
|
||||
try:
|
||||
@ -1021,7 +1056,7 @@ class DB(object):
|
||||
if not os.path.exists(tpath):
|
||||
os.makedirs(tpath)
|
||||
|
||||
if source_ok: # Migrate existing files
|
||||
if source_ok: # Migrate existing files
|
||||
dest = os.path.join(tpath, 'cover.jpg')
|
||||
self.copy_cover_to(current_path, dest,
|
||||
windows_atomic_move=wam, use_hardlink=True)
|
||||
@ -1064,8 +1099,18 @@ class DB(object):
|
||||
os.rename(os.path.join(curpath, oldseg),
|
||||
os.path.join(curpath, newseg))
|
||||
except:
|
||||
break # Fail silently since nothing catastrophic has happened
|
||||
break # Fail silently since nothing catastrophic has happened
|
||||
curpath = os.path.join(curpath, newseg)
|
||||
|
||||
def write_backup(self, path, raw):
|
||||
path = os.path.abspath(os.path.join(self.library_path, path, 'metadata.opf'))
|
||||
with lopen(path, 'wb') as f:
|
||||
f.write(raw)
|
||||
|
||||
def read_backup(self, path):
|
||||
path = os.path.abspath(os.path.join(self.library_path, path, 'metadata.opf'))
|
||||
with lopen(path, 'rb') as f:
|
||||
return f.read()
|
||||
|
||||
# }}}
|
||||
|
||||
|
115
src/calibre/db/backup.py
Normal file
@ -0,0 +1,115 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import weakref, traceback
|
||||
from threading import Thread, Event
|
||||
|
||||
from calibre import prints
|
||||
from calibre.ebooks.metadata.opf2 import metadata_to_opf
|
||||
|
||||
class Abort(Exception):
|
||||
pass
|
||||
|
||||
class MetadataBackup(Thread):
|
||||
'''
|
||||
Continuously backup changed metadata into OPF files
|
||||
in the book directory. This class runs in its own
|
||||
thread.
|
||||
'''
|
||||
|
||||
def __init__(self, db, interval=2, scheduling_interval=0.1):
|
||||
Thread.__init__(self)
|
||||
self.daemon = True
|
||||
self._db = weakref.ref(db)
|
||||
self.stop_running = Event()
|
||||
self.interval = interval
|
||||
self.scheduling_interval = scheduling_interval
|
||||
|
||||
@property
|
||||
def db(self):
|
||||
ans = self._db()
|
||||
if ans is None:
|
||||
raise Abort()
|
||||
return ans
|
||||
|
||||
def stop(self):
|
||||
self.stop_running.set()
|
||||
|
||||
def wait(self, interval):
|
||||
if self.stop_running.wait(interval):
|
||||
raise Abort()
|
||||
|
||||
def run(self):
|
||||
while not self.stop_running.is_set():
|
||||
try:
|
||||
self.wait(self.interval)
|
||||
self.do_one()
|
||||
except Abort:
|
||||
break
|
||||
|
||||
def do_one(self):
|
||||
try:
|
||||
book_id = self.db.get_a_dirtied_book()
|
||||
if book_id is None:
|
||||
return
|
||||
except Abort:
|
||||
raise
|
||||
except:
|
||||
# Happens during interpreter shutdown
|
||||
return
|
||||
|
||||
self.wait(0)
|
||||
|
||||
try:
|
||||
mi, sequence = self.db.get_metadata_for_dump(book_id)
|
||||
except:
|
||||
prints('Failed to get backup metadata for id:', book_id, 'once')
|
||||
traceback.print_exc()
|
||||
self.wait(self.interval)
|
||||
try:
|
||||
mi, sequence = self.db.get_metadata_for_dump(book_id)
|
||||
except:
|
||||
prints('Failed to get backup metadata for id:', book_id, 'again, giving up')
|
||||
traceback.print_exc()
|
||||
return
|
||||
|
||||
if mi is None:
|
||||
self.db.clear_dirtied(book_id, sequence)
|
||||
|
||||
# Give the GUI thread a chance to do something. Python threads don't
|
||||
# have priorities, so this thread would naturally keep the processor
|
||||
# until some scheduling event happens. The wait makes such an event
|
||||
self.wait(self.scheduling_interval)
|
||||
|
||||
try:
|
||||
raw = metadata_to_opf(mi)
|
||||
except:
|
||||
prints('Failed to convert to opf for id:', book_id)
|
||||
traceback.print_exc()
|
||||
return
|
||||
|
||||
self.wait(self.scheduling_interval)
|
||||
|
||||
try:
|
||||
self.db.write_backup(book_id, raw)
|
||||
except:
|
||||
prints('Failed to write backup metadata for id:', book_id, 'once')
|
||||
self.wait(self.interval)
|
||||
try:
|
||||
self.db.write_backup(book_id, raw)
|
||||
except:
|
||||
prints('Failed to write backup metadata for id:', book_id, 'again, giving up')
|
||||
return
|
||||
|
||||
self.db.clear_dirtied(book_id, sequence)
|
||||
|
||||
def break_cycles(self):
|
||||
# Legacy compatibility
|
||||
pass
|
||||
|
@ -7,7 +7,7 @@ __license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, traceback
|
||||
import os, traceback, random
|
||||
from io import BytesIO
|
||||
from collections import defaultdict
|
||||
from functools import wraps, partial
|
||||
@ -15,7 +15,7 @@ from functools import wraps, partial
|
||||
from calibre.constants import iswindows
|
||||
from calibre.db import SPOOL_SIZE
|
||||
from calibre.db.categories import get_categories
|
||||
from calibre.db.locking import create_locks, RecordLock
|
||||
from calibre.db.locking import create_locks
|
||||
from calibre.db.errors import NoSuchFormat
|
||||
from calibre.db.fields import create_field
|
||||
from calibre.db.search import Search
|
||||
@ -23,9 +23,10 @@ from calibre.db.tables import VirtualTable
|
||||
from calibre.db.write import get_series_values
|
||||
from calibre.db.lazy import FormatMetadata, FormatsList
|
||||
from calibre.ebooks.metadata.book.base import Metadata
|
||||
from calibre.ebooks.metadata.opf2 import metadata_to_opf
|
||||
from calibre.ptempfile import (base_dir, PersistentTemporaryFile,
|
||||
SpooledTemporaryFile)
|
||||
from calibre.utils.date import now
|
||||
from calibre.utils.date import now as nowf
|
||||
from calibre.utils.icu import sort_key
|
||||
|
||||
def api(f):
|
||||
@ -57,9 +58,10 @@ class Cache(object):
|
||||
self.fields = {}
|
||||
self.composites = set()
|
||||
self.read_lock, self.write_lock = create_locks()
|
||||
self.record_lock = RecordLock(self.read_lock)
|
||||
self.format_metadata_cache = defaultdict(dict)
|
||||
self.formatter_template_cache = {}
|
||||
self.dirtied_cache = {}
|
||||
self.dirtied_sequence = 0
|
||||
self._search_api = Search(self.field_metadata.get_search_terms())
|
||||
|
||||
# Implement locking for all simple read/write API methods
|
||||
@ -78,17 +80,18 @@ class Cache(object):
|
||||
|
||||
self.initialize_dynamic()
|
||||
|
||||
@write_api
|
||||
def initialize_dynamic(self):
|
||||
# Reconstruct the user categories, putting them into field_metadata
|
||||
# Assumption is that someone else will fix them if they change.
|
||||
self.field_metadata.remove_dynamic_categories()
|
||||
for user_cat in sorted(self.pref('user_categories', {}).iterkeys(), key=sort_key):
|
||||
cat_name = '@' + user_cat # add the '@' to avoid name collision
|
||||
for user_cat in sorted(self._pref('user_categories', {}).iterkeys(), key=sort_key):
|
||||
cat_name = '@' + user_cat # add the '@' to avoid name collision
|
||||
self.field_metadata.add_user_category(label=cat_name, name=user_cat)
|
||||
|
||||
# add grouped search term user categories
|
||||
muc = frozenset(self.pref('grouped_search_make_user_categories', []))
|
||||
for cat in sorted(self.pref('grouped_search_terms', {}).iterkeys(), key=sort_key):
|
||||
muc = frozenset(self._pref('grouped_search_make_user_categories', []))
|
||||
for cat in sorted(self._pref('grouped_search_terms', {}).iterkeys(), key=sort_key):
|
||||
if cat in muc:
|
||||
# There is a chance that these can be duplicates of an existing
|
||||
# user category. Print the exception and continue.
|
||||
@ -102,15 +105,20 @@ class Cache(object):
|
||||
# self.field_metadata.add_search_category(label='search', name=_('Searches'))
|
||||
|
||||
self.field_metadata.add_grouped_search_terms(
|
||||
self.pref('grouped_search_terms', {}))
|
||||
self._pref('grouped_search_terms', {}))
|
||||
|
||||
self._search_api.change_locations(self.field_metadata.get_search_terms())
|
||||
|
||||
self.dirtied_cache = {x:i for i, (x,) in enumerate(
|
||||
self.backend.conn.execute('SELECT book FROM metadata_dirtied'))}
|
||||
if self.dirtied_cache:
|
||||
self.dirtied_sequence = max(self.dirtied_cache.itervalues())+1
|
||||
|
||||
@property
|
||||
def field_metadata(self):
|
||||
return self.backend.field_metadata
|
||||
|
||||
def _get_metadata(self, book_id, get_user_categories=True): # {{{
|
||||
def _get_metadata(self, book_id, get_user_categories=True): # {{{
|
||||
mi = Metadata(None, template_cache=self.formatter_template_cache)
|
||||
author_ids = self._field_ids_for('authors', book_id)
|
||||
aut_list = [self._author_data(i) for i in author_ids]
|
||||
@ -131,7 +139,7 @@ class Cache(object):
|
||||
mi.author_link_map = aul
|
||||
mi.comments = self._field_for('comments', book_id)
|
||||
mi.publisher = self._field_for('publisher', book_id)
|
||||
n = now()
|
||||
n = nowf()
|
||||
mi.timestamp = self._field_for('timestamp', book_id, default_value=n)
|
||||
mi.pubdate = self._field_for('pubdate', book_id, default_value=n)
|
||||
mi.uuid = self._field_for('uuid', book_id,
|
||||
@ -395,16 +403,19 @@ class Cache(object):
|
||||
'''
|
||||
if as_file:
|
||||
ret = SpooledTemporaryFile(SPOOL_SIZE)
|
||||
if not self.copy_cover_to(book_id, ret): return
|
||||
if not self.copy_cover_to(book_id, ret):
|
||||
return
|
||||
ret.seek(0)
|
||||
elif as_path:
|
||||
pt = PersistentTemporaryFile('_dbcover.jpg')
|
||||
with pt:
|
||||
if not self.copy_cover_to(book_id, pt): return
|
||||
if not self.copy_cover_to(book_id, pt):
|
||||
return
|
||||
ret = pt.name
|
||||
else:
|
||||
buf = BytesIO()
|
||||
if not self.copy_cover_to(book_id, buf): return
|
||||
if not self.copy_cover_to(book_id, buf):
|
||||
return
|
||||
ret = buf.getvalue()
|
||||
if as_image:
|
||||
from PyQt4.Qt import QImage
|
||||
@ -413,7 +424,7 @@ class Cache(object):
|
||||
ret = i
|
||||
return ret
|
||||
|
||||
@api
|
||||
@read_api
|
||||
def copy_cover_to(self, book_id, dest, use_hardlink=False):
|
||||
'''
|
||||
Copy the cover to the file like object ``dest``. Returns False
|
||||
@ -422,17 +433,15 @@ class Cache(object):
|
||||
copied to it iff the path is different from the current path (taking
|
||||
case sensitivity into account).
|
||||
'''
|
||||
with self.read_lock:
|
||||
try:
|
||||
path = self._field_for('path', book_id).replace('/', os.sep)
|
||||
except:
|
||||
return False
|
||||
try:
|
||||
path = self._field_for('path', book_id).replace('/', os.sep)
|
||||
except AttributeError:
|
||||
return False
|
||||
|
||||
with self.record_lock.lock(book_id):
|
||||
return self.backend.copy_cover_to(path, dest,
|
||||
return self.backend.copy_cover_to(path, dest,
|
||||
use_hardlink=use_hardlink)
|
||||
|
||||
@api
|
||||
@read_api
|
||||
def copy_format_to(self, book_id, fmt, dest, use_hardlink=False):
|
||||
'''
|
||||
Copy the format ``fmt`` to the file like object ``dest``. If the
|
||||
@ -441,15 +450,13 @@ class Cache(object):
|
||||
the path is different from the current path (taking case sensitivity
|
||||
into account).
|
||||
'''
|
||||
with self.read_lock:
|
||||
try:
|
||||
name = self.fields['formats'].format_fname(book_id, fmt)
|
||||
path = self._field_for('path', book_id).replace('/', os.sep)
|
||||
except:
|
||||
raise NoSuchFormat('Record %d has no %s file'%(book_id, fmt))
|
||||
try:
|
||||
name = self.fields['formats'].format_fname(book_id, fmt)
|
||||
path = self._field_for('path', book_id).replace('/', os.sep)
|
||||
except (KeyError, AttributeError):
|
||||
raise NoSuchFormat('Record %d has no %s file'%(book_id, fmt))
|
||||
|
||||
with self.record_lock.lock(book_id):
|
||||
return self.backend.copy_format_to(book_id, fmt, name, path, dest,
|
||||
return self.backend.copy_format_to(book_id, fmt, name, path, dest,
|
||||
use_hardlink=use_hardlink)
|
||||
|
||||
@read_api
|
||||
@ -520,16 +527,16 @@ class Cache(object):
|
||||
this means that repeated calls yield the same
|
||||
temp file (which is re-created each time)
|
||||
'''
|
||||
with self.read_lock:
|
||||
ext = ('.'+fmt.lower()) if fmt else ''
|
||||
try:
|
||||
fname = self.fields['formats'].format_fname(book_id, fmt)
|
||||
except:
|
||||
return None
|
||||
fname += ext
|
||||
|
||||
ext = ('.'+fmt.lower()) if fmt else ''
|
||||
if as_path:
|
||||
if preserve_filename:
|
||||
with self.read_lock:
|
||||
try:
|
||||
fname = self.fields['formats'].format_fname(book_id, fmt)
|
||||
except:
|
||||
return None
|
||||
fname += ext
|
||||
|
||||
bd = base_dir()
|
||||
d = os.path.join(bd, 'format_abspath')
|
||||
try:
|
||||
@ -537,36 +544,40 @@ class Cache(object):
|
||||
except:
|
||||
pass
|
||||
ret = os.path.join(d, fname)
|
||||
with self.record_lock.lock(book_id):
|
||||
try:
|
||||
self.copy_format_to(book_id, fmt, ret)
|
||||
except NoSuchFormat:
|
||||
return None
|
||||
try:
|
||||
self.copy_format_to(book_id, fmt, ret)
|
||||
except NoSuchFormat:
|
||||
return None
|
||||
else:
|
||||
with PersistentTemporaryFile(ext) as pt, self.record_lock.lock(book_id):
|
||||
with PersistentTemporaryFile(ext) as pt:
|
||||
try:
|
||||
self.copy_format_to(book_id, fmt, pt)
|
||||
except NoSuchFormat:
|
||||
return None
|
||||
ret = pt.name
|
||||
elif as_file:
|
||||
ret = SpooledTemporaryFile(SPOOL_SIZE)
|
||||
with self.record_lock.lock(book_id):
|
||||
with self.read_lock:
|
||||
try:
|
||||
self.copy_format_to(book_id, fmt, ret)
|
||||
except NoSuchFormat:
|
||||
fname = self.fields['formats'].format_fname(book_id, fmt)
|
||||
except:
|
||||
return None
|
||||
fname += ext
|
||||
|
||||
ret = SpooledTemporaryFile(SPOOL_SIZE)
|
||||
try:
|
||||
self.copy_format_to(book_id, fmt, ret)
|
||||
except NoSuchFormat:
|
||||
return None
|
||||
ret.seek(0)
|
||||
# Various bits of code try to use the name as the default
|
||||
# title when reading metadata, so set it
|
||||
ret.name = fname
|
||||
else:
|
||||
buf = BytesIO()
|
||||
with self.record_lock.lock(book_id):
|
||||
try:
|
||||
self.copy_format_to(book_id, fmt, buf)
|
||||
except NoSuchFormat:
|
||||
return None
|
||||
try:
|
||||
self.copy_format_to(book_id, fmt, buf)
|
||||
except NoSuchFormat:
|
||||
return None
|
||||
|
||||
ret = buf.getvalue()
|
||||
|
||||
@ -620,6 +631,30 @@ class Cache(object):
|
||||
return get_categories(self, sort=sort, book_ids=book_ids,
|
||||
icon_map=icon_map)
|
||||
|
||||
@write_api
|
||||
def update_last_modified(self, book_ids, now=None):
|
||||
if now is None:
|
||||
now = nowf()
|
||||
if book_ids:
|
||||
f = self.fields['last_modified']
|
||||
f.writer.set_books({book_id:now for book_id in book_ids}, self.backend)
|
||||
|
||||
@write_api
|
||||
def mark_as_dirty(self, book_ids):
|
||||
self._update_last_modified(book_ids)
|
||||
already_dirtied = set(self.dirtied_cache).intersection(book_ids)
|
||||
new_dirtied = book_ids - already_dirtied
|
||||
already_dirtied = {book_id:self.dirtied_sequence+i for i, book_id in enumerate(already_dirtied)}
|
||||
if already_dirtied:
|
||||
self.dirtied_sequence = max(already_dirtied.itervalues()) + 1
|
||||
self.dirtied_cache.update(already_dirtied)
|
||||
if new_dirtied:
|
||||
self.backend.conn.executemany('INSERT OR IGNORE INTO metadata_dirtied (book) VALUES (?)',
|
||||
((x,) for x in new_dirtied))
|
||||
new_dirtied = {book_id:self.dirtied_sequence+i for i, book_id in enumerate(new_dirtied)}
|
||||
self.dirtied_sequence = max(new_dirtied.itervalues()) + 1
|
||||
self.dirtied_cache.update(new_dirtied)
|
||||
|
||||
@write_api
|
||||
def set_field(self, name, book_id_to_val_map, allow_case_change=True):
|
||||
f = self.fields[name]
|
||||
@ -637,7 +672,7 @@ class Cache(object):
|
||||
else:
|
||||
v = sid = None
|
||||
if name.startswith('#') and sid is None:
|
||||
sid = 1.0 # The value will be set to 1.0 in the db table
|
||||
sid = 1.0 # The value will be set to 1.0 in the db table
|
||||
bimap[k] = v
|
||||
if sid is not None:
|
||||
simap[k] = sid
|
||||
@ -657,7 +692,7 @@ class Cache(object):
|
||||
if dirtied and update_path:
|
||||
self._update_path(dirtied, mark_as_dirtied=False)
|
||||
|
||||
# TODO: Mark these as dirtied so that the opf is regenerated
|
||||
self._mark_as_dirty(dirtied)
|
||||
|
||||
return dirtied
|
||||
|
||||
@ -668,13 +703,115 @@ class Cache(object):
|
||||
author = self._field_for('authors', book_id, default_value=(_('Unknown'),))[0]
|
||||
self.backend.update_path(book_id, title, author, self.fields['path'], self.fields['formats'])
|
||||
if mark_as_dirtied:
|
||||
self._mark_as_dirty(book_ids)
|
||||
|
||||
@read_api
|
||||
def get_a_dirtied_book(self):
|
||||
if self.dirtied_cache:
|
||||
return random.choice(tuple(self.dirtied_cache.iterkeys()))
|
||||
return None
|
||||
|
||||
@read_api
|
||||
def get_metadata_for_dump(self, book_id):
|
||||
mi = None
|
||||
# get the current sequence number for this book to pass back to the
|
||||
# backup thread. This will avoid double calls in the case where the
|
||||
# thread has not done the work between the put and the get_metadata
|
||||
sequence = self.dirtied_cache.get(book_id, None)
|
||||
if sequence is not None:
|
||||
try:
|
||||
# While a book is being created, the path is empty. Don't bother to
|
||||
# try to write the opf, because it will go to the wrong folder.
|
||||
if self._field_for('path', book_id):
|
||||
mi = self._get_metadata(book_id)
|
||||
# Always set cover to cover.jpg. Even if cover doesn't exist,
|
||||
# no harm done. This way no need to call dirtied when
|
||||
# cover is set/removed
|
||||
mi.cover = 'cover.jpg'
|
||||
except:
|
||||
# This almost certainly means that the book has been deleted while
|
||||
# the backup operation sat in the queue.
|
||||
pass
|
||||
# TODO: Mark these books as dirtied so that metadata.opf is
|
||||
# re-created
|
||||
return mi, sequence
|
||||
|
||||
@write_api
|
||||
def clear_dirtied(self, book_id, sequence):
|
||||
'''
|
||||
Clear the dirtied indicator for the books. This is used when fetching
|
||||
metadata, creating an OPF, and writing a file are separated into steps.
|
||||
The last step is clearing the indicator
|
||||
'''
|
||||
dc_sequence = self.dirtied_cache.get(book_id, None)
|
||||
if dc_sequence is None or sequence is None or dc_sequence == sequence:
|
||||
self.backend.conn.execute('DELETE FROM metadata_dirtied WHERE book=?',
|
||||
(book_id,))
|
||||
self.dirtied_cache.pop(book_id, None)
|
||||
|
||||
@write_api
|
||||
def write_backup(self, book_id, raw):
|
||||
try:
|
||||
path = self._field_for('path', book_id).replace('/', os.sep)
|
||||
except:
|
||||
return
|
||||
|
||||
self.backend.write_backup(path, raw)
|
||||
|
||||
@read_api
|
||||
def dirty_queue_length(self):
|
||||
return len(self.dirtied_cache)
|
||||
|
||||
@read_api
|
||||
def read_backup(self, book_id):
|
||||
''' Return the OPF metadata backup for the book as a bytestring or None
|
||||
if no such backup exists. '''
|
||||
try:
|
||||
path = self._field_for('path', book_id).replace('/', os.sep)
|
||||
except:
|
||||
return
|
||||
|
||||
try:
|
||||
return self.backend.read_backup(path)
|
||||
except EnvironmentError:
|
||||
return None
|
||||
|
||||
@write_api
|
||||
def dump_metadata(self, book_ids=None, remove_from_dirtied=True,
|
||||
callback=None):
|
||||
'''
|
||||
Write metadata for each record to an individual OPF file. If callback
|
||||
is not None, it is called once at the start with the number of book_ids
|
||||
being processed. And once for every book_id, with arguments (book_id,
|
||||
mi, ok).
|
||||
'''
|
||||
if book_ids is None:
|
||||
book_ids = set(self.dirtied_cache)
|
||||
|
||||
if callback is not None:
|
||||
callback(len(book_ids), True, False)
|
||||
|
||||
for book_id in book_ids:
|
||||
if self._field_for('path', book_id) is None:
|
||||
if callback is not None:
|
||||
callback(book_id, None, False)
|
||||
continue
|
||||
mi, sequence = self._get_metadata_for_dump(book_id)
|
||||
if mi is None:
|
||||
if callback is not None:
|
||||
callback(book_id, mi, False)
|
||||
continue
|
||||
try:
|
||||
raw = metadata_to_opf(mi)
|
||||
self._write_backup(book_id, raw)
|
||||
if remove_from_dirtied:
|
||||
self._clear_dirtied(book_id, sequence)
|
||||
except:
|
||||
pass
|
||||
if callback is not None:
|
||||
callback(book_id, mi, True)
|
||||
|
||||
# }}}
|
||||
|
||||
class SortKey(object): # {{{
|
||||
class SortKey(object): # {{{
|
||||
|
||||
def __init__(self, fields, sort_keys, book_id):
|
||||
self.orders = tuple(1 if f[1] else -1 for f in fields)
|
||||
|
@ -18,7 +18,7 @@ from calibre.utils.config_base import tweaks
|
||||
from calibre.utils.icu import sort_key
|
||||
from calibre.utils.search_query_parser import saved_searches
|
||||
|
||||
CATEGORY_SORTS = ('name', 'popularity', 'rating') # This has to be a tuple not a set
|
||||
CATEGORY_SORTS = ('name', 'popularity', 'rating') # This has to be a tuple not a set
|
||||
|
||||
class Tag(object):
|
||||
|
||||
@ -218,7 +218,7 @@ def get_categories(dbcache, sort='name', book_ids=None, icon_map=None):
|
||||
else:
|
||||
items.append(taglist[label][n])
|
||||
# else: do nothing, to not include nodes w zero counts
|
||||
cat_name = '@' + user_cat # add the '@' to avoid name collision
|
||||
cat_name = '@' + user_cat # add the '@' to avoid name collision
|
||||
# Not a problem if we accumulate entries in the icon map
|
||||
if icon_map is not None:
|
||||
icon_map[cat_name] = icon_map['user:']
|
||||
|
@ -31,7 +31,7 @@ class Field(object):
|
||||
self.table_type = self.table.table_type
|
||||
self._sort_key = (sort_key if dt in ('text', 'series', 'enumeration') else lambda x: x)
|
||||
self._default_sort_key = ''
|
||||
if dt in { 'int', 'float', 'rating' }:
|
||||
if dt in {'int', 'float', 'rating'}:
|
||||
self._default_sort_key = 0
|
||||
elif dt == 'bool':
|
||||
self._default_sort_key = None
|
||||
@ -138,7 +138,7 @@ class OneToOneField(Field):
|
||||
return self.table.book_col_map.iterkeys()
|
||||
|
||||
def sort_keys_for_books(self, get_metadata, lang_map, all_book_ids):
|
||||
return {id_ : self._sort_key(self.table.book_col_map.get(id_,
|
||||
return {id_: self._sort_key(self.table.book_col_map.get(id_,
|
||||
self._default_sort_key)) for id_ in all_book_ids}
|
||||
|
||||
def iter_searchable_values(self, get_metadata, candidates, default_value=None):
|
||||
@ -183,7 +183,7 @@ class CompositeField(OneToOneField):
|
||||
return ans
|
||||
|
||||
def sort_keys_for_books(self, get_metadata, lang_map, all_book_ids):
|
||||
return {id_ : sort_key(self.get_value_with_cache(id_, get_metadata)) for id_ in
|
||||
return {id_: sort_key(self.get_value_with_cache(id_, get_metadata)) for id_ in
|
||||
all_book_ids}
|
||||
|
||||
def iter_searchable_values(self, get_metadata, candidates, default_value=None):
|
||||
@ -245,7 +245,7 @@ class OnDeviceField(OneToOneField):
|
||||
return iter(())
|
||||
|
||||
def sort_keys_for_books(self, get_metadata, lang_map, all_book_ids):
|
||||
return {id_ : self.for_book(id_) for id_ in
|
||||
return {id_: self.for_book(id_) for id_ in
|
||||
all_book_ids}
|
||||
|
||||
def iter_searchable_values(self, get_metadata, candidates, default_value=None):
|
||||
@ -280,12 +280,12 @@ class ManyToOneField(Field):
|
||||
return self.table.id_map.iterkeys()
|
||||
|
||||
def sort_keys_for_books(self, get_metadata, lang_map, all_book_ids):
|
||||
ans = {id_ : self.table.book_col_map.get(id_, None)
|
||||
ans = {id_: self.table.book_col_map.get(id_, None)
|
||||
for id_ in all_book_ids}
|
||||
sk_map = {cid : (self._default_sort_key if cid is None else
|
||||
sk_map = {cid: (self._default_sort_key if cid is None else
|
||||
self._sort_key(self.table.id_map[cid]))
|
||||
for cid in ans.itervalues()}
|
||||
return {id_ : sk_map[cid] for id_, cid in ans.iteritems()}
|
||||
return {id_: sk_map[cid] for id_, cid in ans.iteritems()}
|
||||
|
||||
def iter_searchable_values(self, get_metadata, candidates, default_value=None):
|
||||
cbm = self.table.col_book_map
|
||||
@ -327,14 +327,14 @@ class ManyToManyField(Field):
|
||||
return self.table.id_map.iterkeys()
|
||||
|
||||
def sort_keys_for_books(self, get_metadata, lang_map, all_book_ids):
|
||||
ans = {id_ : self.table.book_col_map.get(id_, ())
|
||||
ans = {id_: self.table.book_col_map.get(id_, ())
|
||||
for id_ in all_book_ids}
|
||||
all_cids = set()
|
||||
for cids in ans.itervalues():
|
||||
all_cids = all_cids.union(set(cids))
|
||||
sk_map = {cid : self._sort_key(self.table.id_map[cid])
|
||||
sk_map = {cid: self._sort_key(self.table.id_map[cid])
|
||||
for cid in all_cids}
|
||||
return {id_ : (tuple(sk_map[cid] for cid in cids) if cids else
|
||||
return {id_: (tuple(sk_map[cid] for cid in cids) if cids else
|
||||
(self._default_sort_key,))
|
||||
for id_, cids in ans.iteritems()}
|
||||
|
||||
@ -369,9 +369,9 @@ class IdentifiersField(ManyToManyField):
|
||||
|
||||
def sort_keys_for_books(self, get_metadata, lang_map, all_book_ids):
|
||||
'Sort by identifier keys'
|
||||
ans = {id_ : self.table.book_col_map.get(id_, ())
|
||||
ans = {id_: self.table.book_col_map.get(id_, ())
|
||||
for id_ in all_book_ids}
|
||||
return {id_ : (tuple(sorted(cids.iterkeys())) if cids else
|
||||
return {id_: (tuple(sorted(cids.iterkeys())) if cids else
|
||||
(self._default_sort_key,))
|
||||
for id_, cids in ans.iteritems()}
|
||||
|
||||
@ -397,9 +397,9 @@ class AuthorsField(ManyToManyField):
|
||||
|
||||
def author_data(self, author_id):
|
||||
return {
|
||||
'name' : self.table.id_map[author_id],
|
||||
'sort' : self.table.asort_map[author_id],
|
||||
'link' : self.table.alink_map[author_id],
|
||||
'name': self.table.id_map[author_id],
|
||||
'sort': self.table.asort_map[author_id],
|
||||
'link': self.table.alink_map[author_id],
|
||||
}
|
||||
|
||||
def category_sort_value(self, item_id, book_ids, lang_map):
|
||||
@ -505,9 +505,9 @@ class TagsField(ManyToManyField):
|
||||
|
||||
def create_field(name, table):
|
||||
cls = {
|
||||
ONE_ONE : OneToOneField,
|
||||
MANY_ONE : ManyToOneField,
|
||||
MANY_MANY : ManyToManyField,
|
||||
ONE_ONE: OneToOneField,
|
||||
MANY_ONE: ManyToOneField,
|
||||
MANY_MANY: ManyToManyField,
|
||||
}[table.table_type]
|
||||
if name == 'authors':
|
||||
cls = AuthorsField
|
||||
|
94
src/calibre/db/legacy.py
Normal file
@ -0,0 +1,94 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import os
|
||||
from functools import partial
|
||||
|
||||
from calibre.db.backend import DB
|
||||
from calibre.db.cache import Cache
|
||||
from calibre.db.view import View
|
||||
|
||||
class LibraryDatabase(object):
|
||||
|
||||
''' Emulate the old LibraryDatabase2 interface '''
|
||||
|
||||
PATH_LIMIT = DB.PATH_LIMIT
|
||||
WINDOWS_LIBRARY_PATH_LIMIT = DB.WINDOWS_LIBRARY_PATH_LIMIT
|
||||
|
||||
@classmethod
|
||||
def exists_at(cls, path):
|
||||
return path and os.path.exists(os.path.join(path, 'metadata.db'))
|
||||
|
||||
def __init__(self, library_path,
|
||||
default_prefs=None, read_only=False, is_second_db=False,
|
||||
progress_callback=lambda x, y:True, restore_all_prefs=False):
|
||||
|
||||
self.is_second_db = is_second_db # TODO: Use is_second_db
|
||||
|
||||
backend = self.backend = DB(library_path, default_prefs=default_prefs,
|
||||
read_only=read_only, restore_all_prefs=restore_all_prefs,
|
||||
progress_callback=progress_callback)
|
||||
cache = self.new_api = Cache(backend)
|
||||
cache.init()
|
||||
self.data = View(cache)
|
||||
|
||||
self.get_property = self.data.get_property
|
||||
|
||||
for prop in (
|
||||
'author_sort', 'authors', 'comment', 'comments',
|
||||
'publisher', 'rating', 'series', 'series_index', 'tags',
|
||||
'title', 'timestamp', 'uuid', 'pubdate', 'ondevice',
|
||||
'metadata_last_modified', 'languages',
|
||||
):
|
||||
fm = {'comment':'comments', 'metadata_last_modified':
|
||||
'last_modified', 'title_sort':'sort'}.get(prop, prop)
|
||||
setattr(self, prop, partial(self.get_property,
|
||||
loc=self.FIELD_MAP[fm]))
|
||||
|
||||
def close(self):
|
||||
self.backend.close()
|
||||
|
||||
def break_cycles(self):
|
||||
self.data.cache.backend = None
|
||||
self.data.cache = None
|
||||
self.data = self.backend = self.new_api = self.field_metadata = self.prefs = self.listeners = self.refresh_ondevice = None
|
||||
|
||||
# Library wide properties {{{
|
||||
@property
|
||||
def field_metadata(self):
|
||||
return self.backend.field_metadata
|
||||
|
||||
@property
|
||||
def user_version(self):
|
||||
return self.backend.user_version
|
||||
|
||||
@property
|
||||
def library_id(self):
|
||||
return self.backend.library_id
|
||||
|
||||
def last_modified(self):
|
||||
return self.backend.last_modified()
|
||||
|
||||
@property
|
||||
def custom_column_num_map(self):
|
||||
return self.backend.custom_column_num_map
|
||||
|
||||
@property
|
||||
def custom_column_label_map(self):
|
||||
return self.backend.custom_column_label_map
|
||||
|
||||
@property
|
||||
def FIELD_MAP(self):
|
||||
return self.backend.FIELD_MAP
|
||||
|
||||
def all_ids(self):
|
||||
for book_id in self.data.cache.all_book_ids():
|
||||
yield book_id
|
||||
# }}}
|
||||
|
||||
|
@ -39,7 +39,7 @@ def create_locks():
|
||||
l = SHLock()
|
||||
return RWLockWrapper(l), RWLockWrapper(l, is_shared=False)
|
||||
|
||||
class SHLock(object): # {{{
|
||||
class SHLock(object): # {{{
|
||||
'''
|
||||
Shareable lock class. Used to implement the Multiple readers-single writer
|
||||
paradigm. As best as I can tell, neither writer nor reader starvation
|
||||
@ -191,7 +191,7 @@ class SHLock(object): # {{{
|
||||
try:
|
||||
return self._free_waiters.pop()
|
||||
except IndexError:
|
||||
return Condition(self._lock)#, verbose=True)
|
||||
return Condition(self._lock)
|
||||
|
||||
def _return_waiter(self, waiter):
|
||||
self._free_waiters.append(waiter)
|
||||
|
@ -172,7 +172,6 @@ class SchemaUpgrade(object):
|
||||
'''
|
||||
)
|
||||
|
||||
|
||||
def upgrade_version_6(self):
|
||||
'Show authors in order'
|
||||
self.conn.execute('''
|
||||
@ -337,7 +336,7 @@ class SchemaUpgrade(object):
|
||||
FROM {tn};
|
||||
|
||||
'''.format(tn=table_name, cn=column_name,
|
||||
vcn=view_column_name, scn= sort_column_name))
|
||||
vcn=view_column_name, scn=sort_column_name))
|
||||
self.conn.execute(script)
|
||||
|
||||
def create_cust_tag_browser_view(table_name, link_table_name):
|
||||
|
@ -64,7 +64,7 @@ def _match(query, value, matchkind, use_primary_find_in_search=True):
|
||||
else:
|
||||
internal_match_ok = False
|
||||
for t in value:
|
||||
try: ### ignore regexp exceptions, required because search-ahead tries before typing is finished
|
||||
try: # ignore regexp exceptions, required because search-ahead tries before typing is finished
|
||||
t = icu_lower(t)
|
||||
if (matchkind == EQUALS_MATCH):
|
||||
if internal_match_ok:
|
||||
@ -95,20 +95,20 @@ def _match(query, value, matchkind, use_primary_find_in_search=True):
|
||||
return False
|
||||
# }}}
|
||||
|
||||
class DateSearch(object): # {{{
|
||||
class DateSearch(object): # {{{
|
||||
|
||||
def __init__(self):
|
||||
self.operators = {
|
||||
'=' : (1, self.eq),
|
||||
'!=' : (2, self.ne),
|
||||
'>' : (1, self.gt),
|
||||
'>=' : (2, self.ge),
|
||||
'<' : (1, self.lt),
|
||||
'<=' : (2, self.le),
|
||||
'=': (1, self.eq),
|
||||
'!=': (2, self.ne),
|
||||
'>': (1, self.gt),
|
||||
'>=': (2, self.ge),
|
||||
'<': (1, self.lt),
|
||||
'<=': (2, self.le),
|
||||
}
|
||||
self.local_today = { '_today', 'today', icu_lower(_('today')) }
|
||||
self.local_yesterday = { '_yesterday', 'yesterday', icu_lower(_('yesterday')) }
|
||||
self.local_thismonth = { '_thismonth', 'thismonth', icu_lower(_('thismonth')) }
|
||||
self.local_today = {'_today', 'today', icu_lower(_('today'))}
|
||||
self.local_yesterday = {'_yesterday', 'yesterday', icu_lower(_('yesterday'))}
|
||||
self.local_thismonth = {'_thismonth', 'thismonth', icu_lower(_('thismonth'))}
|
||||
self.daysago_pat = re.compile(r'(%s|daysago|_daysago)$'%_('daysago'))
|
||||
|
||||
def eq(self, dbdate, query, field_count):
|
||||
@ -195,13 +195,13 @@ class DateSearch(object): # {{{
|
||||
try:
|
||||
qd = now() - timedelta(int(num))
|
||||
except:
|
||||
raise ParseException(query, len(query), 'Number conversion error')
|
||||
raise ParseException(_('Number conversion error: {0}').format(num))
|
||||
field_count = 3
|
||||
else:
|
||||
try:
|
||||
qd = parse_date(query, as_utc=False)
|
||||
except:
|
||||
raise ParseException(query, len(query), 'Date conversion error')
|
||||
raise ParseException(_('Date conversion error: {0}').format(query))
|
||||
if '-' in query:
|
||||
field_count = query.count('-') + 1
|
||||
else:
|
||||
@ -216,16 +216,16 @@ class DateSearch(object): # {{{
|
||||
return matches
|
||||
# }}}
|
||||
|
||||
class NumericSearch(object): # {{{
|
||||
class NumericSearch(object): # {{{
|
||||
|
||||
def __init__(self):
|
||||
self.operators = {
|
||||
'=':( 1, lambda r, q: r == q ),
|
||||
'>':( 1, lambda r, q: r is not None and r > q ),
|
||||
'<':( 1, lambda r, q: r is not None and r < q ),
|
||||
'!=':( 2, lambda r, q: r != q ),
|
||||
'>=':( 2, lambda r, q: r is not None and r >= q ),
|
||||
'<=':( 2, lambda r, q: r is not None and r <= q )
|
||||
'=':(1, lambda r, q: r == q),
|
||||
'>':(1, lambda r, q: r is not None and r > q),
|
||||
'<':(1, lambda r, q: r is not None and r < q),
|
||||
'!=':(2, lambda r, q: r != q),
|
||||
'>=':(2, lambda r, q: r is not None and r >= q),
|
||||
'<=':(2, lambda r, q: r is not None and r <= q)
|
||||
}
|
||||
|
||||
def __call__(self, query, field_iter, location, datatype, candidates, is_many=False):
|
||||
@ -267,7 +267,7 @@ class NumericSearch(object): # {{{
|
||||
p, relop = self.operators['=']
|
||||
|
||||
cast = int
|
||||
if dt == 'rating':
|
||||
if dt == 'rating':
|
||||
cast = lambda x: 0 if x is None else int(x)
|
||||
adjust = lambda x: x/2
|
||||
elif dt in ('float', 'composite'):
|
||||
@ -285,8 +285,8 @@ class NumericSearch(object): # {{{
|
||||
try:
|
||||
q = cast(query) * mult
|
||||
except:
|
||||
raise ParseException(query, len(query),
|
||||
'Non-numeric value in query: %r'%query)
|
||||
raise ParseException(
|
||||
_('Non-numeric value in query: {0}').format(query))
|
||||
|
||||
for val, book_ids in field_iter():
|
||||
if val is None:
|
||||
@ -303,7 +303,7 @@ class NumericSearch(object): # {{{
|
||||
|
||||
# }}}
|
||||
|
||||
class BooleanSearch(object): # {{{
|
||||
class BooleanSearch(object): # {{{
|
||||
|
||||
def __init__(self):
|
||||
self.local_no = icu_lower(_('no'))
|
||||
@ -324,35 +324,35 @@ class BooleanSearch(object): # {{{
|
||||
for val, book_ids in field_iter():
|
||||
val = force_to_bool(val)
|
||||
if not bools_are_tristate:
|
||||
if val is None or not val: # item is None or set to false
|
||||
if query in { self.local_no, self.local_unchecked, 'no', '_no', 'false' }:
|
||||
if val is None or not val: # item is None or set to false
|
||||
if query in {self.local_no, self.local_unchecked, 'no', '_no', 'false'}:
|
||||
matches |= book_ids
|
||||
else: # item is explicitly set to true
|
||||
if query in { self.local_yes, self.local_checked, 'yes', '_yes', 'true' }:
|
||||
else: # item is explicitly set to true
|
||||
if query in {self.local_yes, self.local_checked, 'yes', '_yes', 'true'}:
|
||||
matches |= book_ids
|
||||
else:
|
||||
if val is None:
|
||||
if query in { self.local_empty, self.local_blank, 'empty', '_empty', 'false' }:
|
||||
if query in {self.local_empty, self.local_blank, 'empty', '_empty', 'false'}:
|
||||
matches |= book_ids
|
||||
elif not val: # is not None and false
|
||||
if query in { self.local_no, self.local_unchecked, 'no', '_no', 'true' }:
|
||||
elif not val: # is not None and false
|
||||
if query in {self.local_no, self.local_unchecked, 'no', '_no', 'true'}:
|
||||
matches |= book_ids
|
||||
else: # item is not None and true
|
||||
if query in { self.local_yes, self.local_checked, 'yes', '_yes', 'true' }:
|
||||
else: # item is not None and true
|
||||
if query in {self.local_yes, self.local_checked, 'yes', '_yes', 'true'}:
|
||||
matches |= book_ids
|
||||
return matches
|
||||
|
||||
# }}}
|
||||
|
||||
class KeyPairSearch(object): # {{{
|
||||
class KeyPairSearch(object): # {{{
|
||||
|
||||
def __call__(self, query, field_iter, candidates, use_primary_find):
|
||||
matches = set()
|
||||
if ':' in query:
|
||||
q = [q.strip() for q in query.split(':')]
|
||||
if len(q) != 2:
|
||||
raise ParseException(query, len(query),
|
||||
'Invalid query format for colon-separated search')
|
||||
raise ParseException(
|
||||
_('Invalid query format for colon-separated search: {0}').format(query))
|
||||
keyq, valq = q
|
||||
keyq_mkind, keyq = _matchkind(keyq)
|
||||
valq_mkind, valq = _matchkind(valq)
|
||||
@ -465,7 +465,8 @@ class Parser(SearchQueryParser):
|
||||
if invert:
|
||||
matches = self.all_book_ids - matches
|
||||
return matches
|
||||
raise ParseException(query, len(query), 'Recursive query group detected')
|
||||
raise ParseException(
|
||||
_('Recursive query group detected: {0}').format(query))
|
||||
|
||||
# If the user has asked to restrict searching over all field, apply
|
||||
# that restriction
|
||||
@ -547,11 +548,12 @@ class Parser(SearchQueryParser):
|
||||
field_metadata = {}
|
||||
|
||||
for x, fm in self.field_metadata.iteritems():
|
||||
if x.startswith('@'): continue
|
||||
if x.startswith('@'):
|
||||
continue
|
||||
if fm['search_terms'] and x != 'series_sort':
|
||||
all_locs.add(x)
|
||||
field_metadata[x] = fm
|
||||
if fm['datatype'] in { 'composite', 'text', 'comments', 'series', 'enumeration' }:
|
||||
if fm['datatype'] in {'composite', 'text', 'comments', 'series', 'enumeration'}:
|
||||
text_fields.add(x)
|
||||
|
||||
locations = all_locs if location == 'all' else {location}
|
||||
@ -687,8 +689,8 @@ class Search(object):
|
||||
dbcache, all_book_ids, dbcache.pref('grouped_search_terms'),
|
||||
self.date_search, self.num_search, self.bool_search,
|
||||
self.keypair_search,
|
||||
prefs[ 'limit_search_columns' ],
|
||||
prefs[ 'limit_search_columns_to' ], self.all_search_locations,
|
||||
prefs['limit_search_columns'],
|
||||
prefs['limit_search_columns_to'], self.all_search_locations,
|
||||
virtual_fields)
|
||||
|
||||
try:
|
||||
|
@ -82,7 +82,7 @@ class OneToOneTable(Table):
|
||||
self.metadata['column'], self.metadata['table'])):
|
||||
self.book_col_map[row[0]] = self.unserialize(row[1])
|
||||
|
||||
class PathTable(OneToOneTable):
|
||||
class PathTable(OneToOneTable):
|
||||
|
||||
def set_path(self, book_id, path, db):
|
||||
self.book_col_map[book_id] = path
|
||||
|
@ -16,6 +16,9 @@ rmtree = partial(shutil.rmtree, ignore_errors=True)
|
||||
|
||||
class BaseTest(unittest.TestCase):
|
||||
|
||||
longMessage = True
|
||||
maxDiff = None
|
||||
|
||||
def setUp(self):
|
||||
self.library_path = self.mkdtemp()
|
||||
self.create_db(self.library_path)
|
||||
@ -40,10 +43,10 @@ class BaseTest(unittest.TestCase):
|
||||
db.conn.close()
|
||||
return dest
|
||||
|
||||
def init_cache(self, library_path):
|
||||
def init_cache(self, library_path=None):
|
||||
from calibre.db.backend import DB
|
||||
from calibre.db.cache import Cache
|
||||
backend = DB(library_path)
|
||||
backend = DB(library_path or self.library_path)
|
||||
cache = Cache(backend)
|
||||
cache.init()
|
||||
return cache
|
||||
@ -53,9 +56,13 @@ class BaseTest(unittest.TestCase):
|
||||
atexit.register(rmtree, ans)
|
||||
return ans
|
||||
|
||||
def init_old(self, library_path):
|
||||
def init_old(self, library_path=None):
|
||||
from calibre.library.database2 import LibraryDatabase2
|
||||
return LibraryDatabase2(library_path)
|
||||
return LibraryDatabase2(library_path or self.library_path)
|
||||
|
||||
def init_legacy(self, library_path=None):
|
||||
from calibre.db.legacy import LibraryDatabase
|
||||
return LibraryDatabase(library_path or self.library_path)
|
||||
|
||||
def clone_library(self, library_path):
|
||||
if not hasattr(self, 'clone_dir'):
|
||||
@ -81,7 +88,8 @@ class BaseTest(unittest.TestCase):
|
||||
'ondevice_col', 'last_modified', 'has_cover',
|
||||
'cover_data'}.union(allfk1)
|
||||
for attr in all_keys:
|
||||
if attr == 'user_metadata': continue
|
||||
if attr == 'user_metadata':
|
||||
continue
|
||||
attr1, attr2 = getattr(mi1, attr), getattr(mi2, attr)
|
||||
if attr == 'formats':
|
||||
attr1, attr2 = map(lambda x:tuple(x) if x else (), (attr1, attr2))
|
||||
|
66
src/calibre/db/tests/legacy.py
Normal file
@ -0,0 +1,66 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
from calibre.db.tests.base import BaseTest
|
||||
|
||||
class LegacyTest(BaseTest):
|
||||
|
||||
''' Test the emulation of the legacy interface. '''
|
||||
|
||||
def test_library_wide_properties(self): # {{{
|
||||
'Test library wide properties'
|
||||
def get_props(db):
|
||||
props = ('user_version', 'is_second_db', 'library_id', 'field_metadata',
|
||||
'custom_column_label_map', 'custom_column_num_map')
|
||||
fprops = ('last_modified', )
|
||||
ans = {x:getattr(db, x) for x in props}
|
||||
ans.update({x:getattr(db, x)() for x in fprops})
|
||||
ans['all_ids'] = frozenset(db.all_ids())
|
||||
return ans
|
||||
|
||||
old = self.init_old()
|
||||
oldvals = get_props(old)
|
||||
old.close()
|
||||
del old
|
||||
db = self.init_legacy()
|
||||
newvals = get_props(db)
|
||||
self.assertEqual(oldvals, newvals)
|
||||
db.close()
|
||||
# }}}
|
||||
|
||||
def test_get_property(self): # {{{
|
||||
'Test the get_property interface for reading data'
|
||||
def get_values(db):
|
||||
ans = {}
|
||||
for label, loc in db.FIELD_MAP.iteritems():
|
||||
if isinstance(label, int):
|
||||
label = '#'+db.custom_column_num_map[label]['label']
|
||||
label = type('')(label)
|
||||
ans[label] = tuple(db.get_property(i, index_is_id=True, loc=loc)
|
||||
for i in db.all_ids())
|
||||
if label in ('id', 'title', '#tags'):
|
||||
with self.assertRaises(IndexError):
|
||||
db.get_property(9999, loc=loc)
|
||||
with self.assertRaises(IndexError):
|
||||
db.get_property(9999, index_is_id=True, loc=loc)
|
||||
if label in {'tags', 'formats'}:
|
||||
# Order is random in the old db for these
|
||||
ans[label] = tuple(set(x.split(',')) if x else x for x in ans[label])
|
||||
return ans
|
||||
|
||||
old = self.init_old()
|
||||
old_vals = get_values(old)
|
||||
old.close()
|
||||
old = None
|
||||
db = self.init_legacy()
|
||||
new_vals = get_values(db)
|
||||
db.close()
|
||||
self.assertEqual(old_vals, new_vals)
|
||||
|
||||
# }}}
|
||||
|
@ -9,15 +9,32 @@ __docformat__ = 'restructuredtext en'
|
||||
|
||||
import unittest, os, argparse
|
||||
|
||||
try:
|
||||
import init_calibre # noqa
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
def find_tests():
|
||||
return unittest.defaultTestLoader.discover(os.path.dirname(os.path.abspath(__file__)), pattern='*.py')
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('name', nargs='?', default=None, help='The name of the test to run, for e.g. writing.WritingTest.many_many_basic')
|
||||
parser.add_argument('name', nargs='?', default=None,
|
||||
help='The name of the test to run, for e.g. writing.WritingTest.many_many_basic or .many_many_basic for a shortcut')
|
||||
args = parser.parse_args()
|
||||
if args.name:
|
||||
unittest.TextTestRunner(verbosity=4).run(unittest.defaultTestLoader.loadTestsFromName(args.name))
|
||||
if args.name and args.name.startswith('.'):
|
||||
tests = find_tests()
|
||||
ans = None
|
||||
try:
|
||||
for suite in tests:
|
||||
for test in suite._tests:
|
||||
for s in test:
|
||||
if s._testMethodName == args.name[1:]:
|
||||
tests = s
|
||||
raise StopIteration()
|
||||
except StopIteration:
|
||||
pass
|
||||
else:
|
||||
unittest.TextTestRunner(verbosity=4).run(find_tests())
|
||||
tests = unittest.defaultTestLoader.loadTestsFromName(args.name) if args.name else find_tests()
|
||||
unittest.TextTestRunner(verbosity=4).run(tests)
|
||||
|
||||
|
@ -8,6 +8,7 @@ __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import datetime
|
||||
from io import BytesIO
|
||||
|
||||
from calibre.utils.date import utc_tz
|
||||
from calibre.db.tests.base import BaseTest
|
||||
@ -205,6 +206,9 @@ class ReadingTest(BaseTest):
|
||||
else:
|
||||
self.assertEqual(cdata, cache.cover(book_id, as_path=True),
|
||||
'Reading of null cover as path failed')
|
||||
buf = BytesIO()
|
||||
self.assertFalse(cache.copy_cover_to(99999, buf), 'copy_cover_to() did not return False for non-existent book_id')
|
||||
self.assertFalse(cache.copy_cover_to(3, buf), 'copy_cover_to() did not return False for non-existent cover')
|
||||
|
||||
# }}}
|
||||
|
||||
@ -305,6 +309,7 @@ class ReadingTest(BaseTest):
|
||||
def test_get_formats(self): # {{{
|
||||
'Test reading ebook formats using the format() method'
|
||||
from calibre.library.database2 import LibraryDatabase2
|
||||
from calibre.db.cache import NoSuchFormat
|
||||
old = LibraryDatabase2(self.library_path)
|
||||
ids = old.all_ids()
|
||||
lf = {i:set(old.formats(i, index_is_id=True).split(',')) if old.formats(
|
||||
@ -332,6 +337,9 @@ class ReadingTest(BaseTest):
|
||||
self.assertEqual(old, f.read(),
|
||||
'Failed to read format as path')
|
||||
|
||||
buf = BytesIO()
|
||||
self.assertRaises(NoSuchFormat, cache.copy_format_to, 99999, 'X', buf, 'copy_format_to() failed to raise an exception for non-existent book')
|
||||
self.assertRaises(NoSuchFormat, cache.copy_format_to, 1, 'X', buf, 'copy_format_to() failed to raise an exception for non-existent format')
|
||||
|
||||
# }}}
|
||||
|
||||
|
@ -9,6 +9,7 @@ __docformat__ = 'restructuredtext en'
|
||||
|
||||
from collections import namedtuple
|
||||
from functools import partial
|
||||
from io import BytesIO
|
||||
|
||||
from calibre.ebooks.metadata import author_to_author_sort
|
||||
from calibre.utils.date import UNDEFINED_DATE
|
||||
@ -16,6 +17,7 @@ from calibre.db.tests.base import BaseTest
|
||||
|
||||
class WritingTest(BaseTest):
|
||||
|
||||
# Utils {{{
|
||||
def create_getter(self, name, getter=None):
|
||||
if getter is None:
|
||||
if name.endswith('_index'):
|
||||
@ -35,7 +37,7 @@ class WritingTest(BaseTest):
|
||||
ans = lambda db:partial(getattr(db, setter), commit=True)
|
||||
return ans
|
||||
|
||||
def create_test(self, name, vals, getter=None, setter=None ):
|
||||
def create_test(self, name, vals, getter=None, setter=None):
|
||||
T = namedtuple('Test', 'name vals getter setter')
|
||||
return T(name, vals, self.create_getter(name, getter),
|
||||
self.create_setter(name, setter))
|
||||
@ -70,8 +72,9 @@ class WritingTest(BaseTest):
|
||||
'Failed setting for %s, sqlite value not the same: %r != %r'%(
|
||||
test.name, old_sqlite_res, sqlite_res))
|
||||
del db
|
||||
# }}}
|
||||
|
||||
def test_one_one(self): # {{{
|
||||
def test_one_one(self): # {{{
|
||||
'Test setting of values in one-one fields'
|
||||
tests = [self.create_test('#yesno', (True, False, 'true', 'false', None))]
|
||||
for name, getter, setter in (
|
||||
@ -112,7 +115,7 @@ class WritingTest(BaseTest):
|
||||
self.run_tests(tests)
|
||||
# }}}
|
||||
|
||||
def test_many_one_basic(self): # {{{
|
||||
def test_many_one_basic(self): # {{{
|
||||
'Test the different code paths for writing to a many-one field'
|
||||
cl = self.cloned_library
|
||||
cache = self.init_cache(cl)
|
||||
@ -199,7 +202,7 @@ class WritingTest(BaseTest):
|
||||
|
||||
# }}}
|
||||
|
||||
def test_many_many_basic(self): # {{{
|
||||
def test_many_many_basic(self): # {{{
|
||||
'Test the different code paths for writing to a many-many field'
|
||||
cl = self.cloned_library
|
||||
cache = self.init_cache(cl)
|
||||
@ -289,6 +292,67 @@ class WritingTest(BaseTest):
|
||||
ae(c.field_for('sort', 1), 'Moose, The')
|
||||
ae(c.field_for('sort', 2), 'Cat')
|
||||
|
||||
|
||||
# }}}
|
||||
|
||||
def test_dirtied(self): # {{{
|
||||
'Test the setting of the dirtied flag and the last_modified column'
|
||||
cl = self.cloned_library
|
||||
cache = self.init_cache(cl)
|
||||
ae, af, sf = self.assertEqual, self.assertFalse, cache.set_field
|
||||
# First empty dirtied
|
||||
cache.dump_metadata()
|
||||
af(cache.dirtied_cache)
|
||||
af(self.init_cache(cl).dirtied_cache)
|
||||
|
||||
prev = cache.field_for('last_modified', 3)
|
||||
import calibre.db.cache as c
|
||||
from datetime import timedelta
|
||||
utime = prev+timedelta(days=1)
|
||||
onowf = c.nowf
|
||||
c.nowf = lambda: utime
|
||||
try:
|
||||
ae(sf('title', {3:'xxx'}), set([3]))
|
||||
self.assertTrue(3 in cache.dirtied_cache)
|
||||
ae(cache.field_for('last_modified', 3), utime)
|
||||
cache.dump_metadata()
|
||||
raw = cache.read_backup(3)
|
||||
from calibre.ebooks.metadata.opf2 import OPF
|
||||
opf = OPF(BytesIO(raw))
|
||||
ae(opf.title, 'xxx')
|
||||
finally:
|
||||
c.nowf = onowf
|
||||
# }}}
|
||||
|
||||
def test_backup(self): # {{{
|
||||
'Test the automatic backup of changed metadata'
|
||||
cl = self.cloned_library
|
||||
cache = self.init_cache(cl)
|
||||
ae, af, sf, ff = self.assertEqual, self.assertFalse, cache.set_field, cache.field_for
|
||||
# First empty dirtied
|
||||
cache.dump_metadata()
|
||||
af(cache.dirtied_cache)
|
||||
from calibre.db.backup import MetadataBackup
|
||||
interval = 0.01
|
||||
mb = MetadataBackup(cache, interval=interval, scheduling_interval=0)
|
||||
mb.start()
|
||||
try:
|
||||
ae(sf('title', {1:'title1', 2:'title2', 3:'title3'}), {1,2,3})
|
||||
ae(sf('authors', {1:'author1 & author2', 2:'author1 & author2', 3:'author1 & author2'}), {1,2,3})
|
||||
count = 6
|
||||
while cache.dirty_queue_length() and count > 0:
|
||||
mb.join(interval)
|
||||
count -= 1
|
||||
af(cache.dirty_queue_length())
|
||||
finally:
|
||||
mb.stop()
|
||||
mb.join(interval)
|
||||
af(mb.is_alive())
|
||||
from calibre.ebooks.metadata.opf2 import OPF
|
||||
for book_id in (1, 2, 3):
|
||||
raw = cache.read_backup(book_id)
|
||||
opf = OPF(BytesIO(raw))
|
||||
ae(opf.title, 'title%d'%book_id)
|
||||
ae(opf.authors, ['author1', 'author2'])
|
||||
# }}}
|
||||
|
||||
|
||||
|
@ -11,6 +11,9 @@ import weakref
|
||||
from functools import partial
|
||||
from itertools import izip, imap
|
||||
|
||||
from calibre.ebooks.metadata import title_sort
|
||||
from calibre.utils.config_base import tweaks
|
||||
|
||||
def sanitize_sort_field_name(field_metadata, field):
|
||||
field = field_metadata.search_term_to_field_key(field.lower().strip())
|
||||
# translate some fields to their hidden equivalent
|
||||
@ -40,6 +43,18 @@ class TableRow(list):
|
||||
else:
|
||||
return view._field_getters[obj](self.book_id)
|
||||
|
||||
def format_is_multiple(x, sep=',', repl=None):
|
||||
if not x:
|
||||
return None
|
||||
if repl is not None:
|
||||
x = (y.replace(sep, repl) for y in x)
|
||||
return sep.join(x)
|
||||
|
||||
def format_identifiers(x):
|
||||
if not x:
|
||||
return None
|
||||
return ','.join('%s:%s'%(k, v) for k, v in x.iteritems())
|
||||
|
||||
class View(object):
|
||||
|
||||
''' A table view of the database, with rows and columns. Also supports
|
||||
@ -49,33 +64,63 @@ class View(object):
|
||||
self.cache = cache
|
||||
self.marked_ids = {}
|
||||
self.search_restriction_book_count = 0
|
||||
self.search_restriction = ''
|
||||
self.search_restriction = self.base_restriction = ''
|
||||
self.search_restriction_name = self.base_restriction_name = ''
|
||||
self._field_getters = {}
|
||||
for col, idx in cache.backend.FIELD_MAP.iteritems():
|
||||
label, fmt = col, lambda x:x
|
||||
func = {
|
||||
'id': self._get_id,
|
||||
'au_map': self.get_author_data,
|
||||
'ondevice': self.get_ondevice,
|
||||
'marked': self.get_marked,
|
||||
'series_sort':self.get_series_sort,
|
||||
}.get(col, self._get)
|
||||
if isinstance(col, int):
|
||||
label = self.cache.backend.custom_column_num_map[col]['label']
|
||||
label = (self.cache.backend.field_metadata.custom_field_prefix
|
||||
+ label)
|
||||
self._field_getters[idx] = partial(self.get, label)
|
||||
else:
|
||||
if label.endswith('_index'):
|
||||
try:
|
||||
self._field_getters[idx] = {
|
||||
'id' : self._get_id,
|
||||
'au_map' : self.get_author_data,
|
||||
'ondevice': self.get_ondevice,
|
||||
'marked' : self.get_marked,
|
||||
}[col]
|
||||
except KeyError:
|
||||
self._field_getters[idx] = partial(self.get, col)
|
||||
num = int(label.partition('_')[0])
|
||||
except ValueError:
|
||||
pass # series_index
|
||||
else:
|
||||
label = self.cache.backend.custom_column_num_map[num]['label']
|
||||
label = (self.cache.backend.field_metadata.custom_field_prefix
|
||||
+ label + '_index')
|
||||
|
||||
fm = self.field_metadata[label]
|
||||
fm
|
||||
if label == 'authors':
|
||||
fmt = partial(format_is_multiple, repl='|')
|
||||
elif label in {'tags', 'languages', 'formats'}:
|
||||
fmt = format_is_multiple
|
||||
elif label == 'cover':
|
||||
fmt = bool
|
||||
elif label == 'identifiers':
|
||||
fmt = format_identifiers
|
||||
elif fm['datatype'] == 'text' and fm['is_multiple']:
|
||||
sep = fm['is_multiple']['cache_to_list']
|
||||
if sep not in {'&','|'}:
|
||||
sep = '|'
|
||||
fmt = partial(format_is_multiple, sep=sep)
|
||||
self._field_getters[idx] = partial(func, label, fmt=fmt) if func == self._get else func
|
||||
|
||||
self._map = tuple(self.cache.all_book_ids())
|
||||
self._map_filtered = tuple(self._map)
|
||||
|
||||
def get_property(self, id_or_index, index_is_id=False, loc=-1):
|
||||
book_id = id_or_index if index_is_id else self._map_filtered[id_or_index]
|
||||
return self._field_getters[loc](book_id)
|
||||
|
||||
@property
|
||||
def field_metadata(self):
|
||||
return self.cache.field_metadata
|
||||
|
||||
def _get_id(self, idx, index_is_id=True):
|
||||
if index_is_id and idx not in self.cache.all_book_ids():
|
||||
raise IndexError('No book with id %s present'%idx)
|
||||
return idx if index_is_id else self.index_to_id(idx)
|
||||
|
||||
def __getitem__(self, row):
|
||||
@ -107,9 +152,21 @@ class View(object):
|
||||
def index_to_id(self, idx):
|
||||
return self._map_filtered[idx]
|
||||
|
||||
def get(self, field, idx, index_is_id=True, default_value=None):
|
||||
def _get(self, field, idx, index_is_id=True, default_value=None, fmt=lambda x:x):
|
||||
id_ = idx if index_is_id else self.index_to_id(idx)
|
||||
return self.cache.field_for(field, id_)
|
||||
if index_is_id and id_ not in self.cache.all_book_ids():
|
||||
raise IndexError('No book with id %s present'%idx)
|
||||
return fmt(self.cache.field_for(field, id_, default_value=default_value))
|
||||
|
||||
def get_series_sort(self, idx, index_is_id=True, default_value=''):
|
||||
book_id = idx if index_is_id else self.index_to_id(idx)
|
||||
with self.cache.read_lock:
|
||||
lang_map = self.cache.fields['languages'].book_value_map
|
||||
lang = lang_map.get(book_id, None) or None
|
||||
if lang:
|
||||
lang = lang[0]
|
||||
return title_sort(self.cache._field_for('series', book_id, default_value=''),
|
||||
order=tweaks['title_series_sorting'], lang=lang)
|
||||
|
||||
def get_ondevice(self, idx, index_is_id=True, default_value=''):
|
||||
id_ = idx if index_is_id else self.index_to_id(idx)
|
||||
@ -119,26 +176,15 @@ class View(object):
|
||||
id_ = idx if index_is_id else self.index_to_id(idx)
|
||||
return self.marked_ids.get(id_, default_value)
|
||||
|
||||
def get_author_data(self, idx, index_is_id=True, default_value=()):
|
||||
'''
|
||||
Return author data for all authors of the book identified by idx as a
|
||||
tuple of dictionaries. The dictionaries should never be empty, unless
|
||||
there is a bug somewhere. The list could be empty if idx point to an
|
||||
non existent book, or book with no authors (though again a book with no
|
||||
authors should never happen).
|
||||
|
||||
Each dictionary has the keys: name, sort, link. Link can be an empty
|
||||
string.
|
||||
|
||||
default_value is ignored, this method always returns a tuple
|
||||
'''
|
||||
def get_author_data(self, idx, index_is_id=True, default_value=None):
|
||||
id_ = idx if index_is_id else self.index_to_id(idx)
|
||||
with self.cache.read_lock:
|
||||
ids = self.cache._field_ids_for('authors', id_)
|
||||
ans = []
|
||||
for id_ in ids:
|
||||
ans.append(self.cache._author_data(id_))
|
||||
return tuple(ans)
|
||||
data = self.cache._author_data(id_)
|
||||
ans.append(':::'.join((data['name'], data['sort'], data['link'])))
|
||||
return ':#:'.join(ans) if ans else default_value
|
||||
|
||||
def multisort(self, fields=[], subsort=False, only_ids=None):
|
||||
fields = [(sanitize_sort_field_name(self.field_metadata, x), bool(y)) for x, y in fields]
|
||||
@ -168,8 +214,19 @@ class View(object):
|
||||
return ans
|
||||
self._map_filtered = tuple(ans)
|
||||
|
||||
def _build_restriction_string(self, restriction):
|
||||
if self.base_restriction:
|
||||
if restriction:
|
||||
return u'(%s) and (%s)' % (self.base_restriction, restriction)
|
||||
else:
|
||||
return self.base_restriction
|
||||
else:
|
||||
return restriction
|
||||
|
||||
def search_getting_ids(self, query, search_restriction,
|
||||
set_restriction_count=False):
|
||||
set_restriction_count=False, use_virtual_library=True):
|
||||
if use_virtual_library:
|
||||
search_restriction = self._build_restriction_string(search_restriction)
|
||||
q = ''
|
||||
if not query or not query.strip():
|
||||
q = search_restriction
|
||||
@ -188,11 +245,32 @@ class View(object):
|
||||
self.search_restriction_book_count = len(rv)
|
||||
return rv
|
||||
|
||||
def get_search_restriction(self):
|
||||
return self.search_restriction
|
||||
|
||||
def set_search_restriction(self, s):
|
||||
self.search_restriction = s
|
||||
|
||||
def get_base_restriction(self):
|
||||
return self.base_restriction
|
||||
|
||||
def set_base_restriction(self, s):
|
||||
self.base_restriction = s
|
||||
|
||||
def get_base_restriction_name(self):
|
||||
return self.base_restriction_name
|
||||
|
||||
def set_base_restriction_name(self, s):
|
||||
self.base_restriction_name = s
|
||||
|
||||
def get_search_restriction_name(self):
|
||||
return self.search_restriction_name
|
||||
|
||||
def set_search_restriction_name(self, s):
|
||||
self.search_restriction_name = s
|
||||
|
||||
def search_restriction_applied(self):
|
||||
return bool(self.search_restriction)
|
||||
return bool(self.search_restriction) or bool(self.base_restriction)
|
||||
|
||||
def get_search_restriction_book_count(self):
|
||||
return self.search_restriction_book_count
|
||||
|
@ -417,7 +417,7 @@ def many_many(book_id_val_map, db, field, allow_case_change, *args):
|
||||
|
||||
# }}}
|
||||
|
||||
def identifiers(book_id_val_map, db, field, *args): # {{{
|
||||
def identifiers(book_id_val_map, db, field, *args): # {{{
|
||||
table = field.table
|
||||
updates = set()
|
||||
for book_id, identifiers in book_id_val_map.iteritems():
|
||||
|
@ -71,6 +71,7 @@ class ANDROID(USBMS):
|
||||
0x42f7 : [0x216],
|
||||
0x4365 : [0x216],
|
||||
0x4366 : [0x216],
|
||||
0x4371 : [0x216],
|
||||
},
|
||||
# Freescale
|
||||
0x15a2 : {
|
||||
@ -239,7 +240,7 @@ class ANDROID(USBMS):
|
||||
'ADVANCED', 'SGH-I727', 'USB_FLASH_DRIVER', 'ANDROID',
|
||||
'S5830I_CARD', 'MID7042', 'LINK-CREATE', '7035', 'VIEWPAD_7E',
|
||||
'NOVO7', 'MB526', '_USB#WYK7MSF8KE', 'TABLET_PC', 'F', 'MT65XX_MS',
|
||||
'ICS', 'E400', '__FILE-STOR_GADG', 'ST80208-1', 'GT-S5660M_CARD']
|
||||
'ICS', 'E400', '__FILE-STOR_GADG', 'ST80208-1', 'GT-S5660M_CARD', 'XT894']
|
||||
WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
|
||||
'FILE-STOR_GADGET', 'SGH-T959_CARD', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
|
||||
'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
|
||||
@ -250,7 +251,7 @@ class ANDROID(USBMS):
|
||||
'FILE-CD_GADGET', 'GT-I9001_CARD', 'USB_2.0', 'XT875',
|
||||
'UMS_COMPOSITE', 'PRO', '.KOBO_VOX', 'SGH-T989_CARD', 'SGH-I727',
|
||||
'USB_FLASH_DRIVER', 'ANDROID', 'MID7042', '7035', 'VIEWPAD_7E',
|
||||
'NOVO7', 'ADVANCED', 'TABLET_PC', 'F', 'E400_SD_CARD', 'ST80208-1']
|
||||
'NOVO7', 'ADVANCED', 'TABLET_PC', 'F', 'E400_SD_CARD', 'ST80208-1', 'XT894']
|
||||
|
||||
OSX_MAIN_MEM = 'Android Device Main Memory'
|
||||
|
||||
|
@ -35,11 +35,11 @@ class KOBO(USBMS):
|
||||
gui_name = 'Kobo Reader'
|
||||
description = _('Communicate with the Kobo Reader')
|
||||
author = 'Timothy Legge and David Forrester'
|
||||
version = (2, 0, 7)
|
||||
version = (2, 0, 8)
|
||||
|
||||
dbversion = 0
|
||||
fwversion = 0
|
||||
supported_dbversion = 75
|
||||
supported_dbversion = 80
|
||||
has_kepubs = False
|
||||
|
||||
supported_platforms = ['windows', 'osx', 'linux']
|
||||
@ -419,7 +419,7 @@ class KOBO(USBMS):
|
||||
# If all this succeeds we need to delete the images files via the ImageID
|
||||
return ImageID
|
||||
|
||||
def delete_images(self, ImageID):
|
||||
def delete_images(self, ImageID, book_path):
|
||||
if ImageID != None:
|
||||
path_prefix = '.kobo/images/'
|
||||
path = self._main_prefix + path_prefix + ImageID
|
||||
@ -449,7 +449,7 @@ class KOBO(USBMS):
|
||||
|
||||
ImageID = self.delete_via_sql(ContentID, ContentType)
|
||||
#print " We would now delete the Images for" + ImageID
|
||||
self.delete_images(ImageID)
|
||||
self.delete_images(ImageID, path)
|
||||
|
||||
if os.path.exists(path):
|
||||
# Delete the ebook
|
||||
@ -1204,10 +1204,16 @@ class KOBOTOUCH(KOBO):
|
||||
description = 'Communicate with the Kobo Touch, Glo and Mini firmware. Based on the existing Kobo driver by %s.' % (KOBO.author)
|
||||
# icon = I('devices/kobotouch.jpg')
|
||||
|
||||
supported_dbversion = 75
|
||||
min_supported_dbversion = 53
|
||||
min_dbversion_series = 65
|
||||
min_dbversion_archive = 71
|
||||
supported_dbversion = 80
|
||||
min_supported_dbversion = 53
|
||||
min_dbversion_series = 65
|
||||
min_dbversion_archive = 71
|
||||
min_dbversion_images_on_sdcard = 77
|
||||
|
||||
max_supported_fwversion = (2,5,1)
|
||||
min_fwversion_images_on_sdcard = (2,4,1)
|
||||
|
||||
has_kepubs = True
|
||||
|
||||
booklist_class = KTCollectionsBookList
|
||||
book_class = Book
|
||||
@ -1354,14 +1360,13 @@ class KOBOTOUCH(KOBO):
|
||||
|
||||
# Determine the firmware version
|
||||
try:
|
||||
with open(self.normalize_path(self._main_prefix + '.kobo/version'),
|
||||
'rb') as f:
|
||||
with open(self.normalize_path(self._main_prefix + '.kobo/version'), 'rb') as f:
|
||||
self.fwversion = f.readline().split(',')[2]
|
||||
self.fwversion = tuple((int(x) for x in self.fwversion.split('.')))
|
||||
except:
|
||||
self.fwversion = 'unknown'
|
||||
self.fwversion = (0,0,0)
|
||||
|
||||
|
||||
if self.fwversion != '1.0' and self.fwversion != '1.4':
|
||||
self.has_kepubs = True
|
||||
debug_print('Version of driver:', self.version, 'Has kepubs:', self.has_kepubs)
|
||||
debug_print('Version of firmware:', self.fwversion, 'Has kepubs:', self.has_kepubs)
|
||||
|
||||
@ -1466,6 +1471,7 @@ class KOBOTOUCH(KOBO):
|
||||
if show_debug:
|
||||
self.debug_index = idx
|
||||
debug_print("KoboTouch:update_booklist - idx=%d"%idx)
|
||||
debug_print("KoboTouch:update_booklist - lpath=%s"%lpath)
|
||||
debug_print('KoboTouch:update_booklist - bl[idx].device_collections=', bl[idx].device_collections)
|
||||
debug_print('KoboTouch:update_booklist - playlist_map=', playlist_map)
|
||||
debug_print('KoboTouch:update_booklist - bookshelves=', bookshelves)
|
||||
@ -1477,7 +1483,7 @@ class KOBOTOUCH(KOBO):
|
||||
bl_cache[lpath] = None
|
||||
|
||||
if ImageID is not None:
|
||||
imagename = self.imagefilename_from_imageID(ImageID)
|
||||
imagename = self.imagefilename_from_imageID(prefix, ImageID)
|
||||
if imagename is not None:
|
||||
bl[idx].thumbnail = ImageWrapper(imagename)
|
||||
if (ContentType == '6' and MimeType != 'application/x-kobo-epub+zip'):
|
||||
@ -1717,12 +1723,14 @@ class KOBOTOUCH(KOBO):
|
||||
debug_print("KoboTouch:books - end - oncard='%s'"%oncard)
|
||||
return bl
|
||||
|
||||
def imagefilename_from_imageID(self, ImageID):
|
||||
def imagefilename_from_imageID(self, prefix, ImageID):
|
||||
show_debug = self.is_debugging_title(ImageID)
|
||||
|
||||
path = self.images_path(prefix)
|
||||
path = self.normalize_path(path.replace('/', os.sep))
|
||||
|
||||
for ending, cover_options in self.cover_file_endings().items():
|
||||
fpath = self._main_prefix + '.kobo/images/' + ImageID + ending
|
||||
fpath = self.normalize_path(fpath.replace('/', os.sep))
|
||||
fpath = path + ImageID + ending
|
||||
if os.path.exists(fpath):
|
||||
if show_debug:
|
||||
debug_print("KoboTouch:imagefilename_from_imageID - have cover image fpath=%s" % (fpath))
|
||||
@ -1764,7 +1772,7 @@ class KOBOTOUCH(KOBO):
|
||||
|
||||
if not self.copying_covers():
|
||||
imageID = self.imageid_from_contentid(contentID)
|
||||
self.delete_images(imageID)
|
||||
self.delete_images(imageID, fname)
|
||||
connection.commit()
|
||||
|
||||
cursor.close()
|
||||
@ -1821,11 +1829,11 @@ class KOBOTOUCH(KOBO):
|
||||
|
||||
return imageId
|
||||
|
||||
def delete_images(self, ImageID):
|
||||
def delete_images(self, ImageID, book_path):
|
||||
debug_print("KoboTouch:delete_images - ImageID=", ImageID)
|
||||
if ImageID != None:
|
||||
path_prefix = '.kobo/images/'
|
||||
path = self._main_prefix + path_prefix + ImageID
|
||||
path = self.images_path(book_path)
|
||||
path = path + ImageID
|
||||
|
||||
for ending in self.cover_file_endings().keys():
|
||||
fpath = path + ending
|
||||
@ -1872,12 +1880,14 @@ class KOBOTOUCH(KOBO):
|
||||
def get_content_type_from_extension(self, extension):
|
||||
debug_print("KoboTouch:get_content_type_from_extension - start")
|
||||
# With new firmware, ContentType appears to be 6 for all types of sideloaded books.
|
||||
if self.fwversion.startswith('2.'):
|
||||
if self.fwversion >= (1,9,17) or extension == '.kobo' or extension == '.mobi':
|
||||
debug_print("KoboTouch:get_content_type_from_extension - V2 firmware")
|
||||
ContentType = 6
|
||||
# For older firmware, it depends on the type of file.
|
||||
elif extension == '.kobo' or extension == '.mobi':
|
||||
ContentType = 6
|
||||
else:
|
||||
debug_print("KoboTouch:get_content_type_from_extension - calling super")
|
||||
ContentType = super(KOBOTOUCH, self).get_content_type_from_extension(extension)
|
||||
ContentType = 901
|
||||
return ContentType
|
||||
|
||||
def update_device_database_collections(self, booklists, collections_attributes, oncard):
|
||||
@ -2088,8 +2098,8 @@ class KOBOTOUCH(KOBO):
|
||||
# debug_print('KoboTouch: not uploading cover')
|
||||
return
|
||||
|
||||
# Don't upload covers if book is on the SD card
|
||||
if self._card_a_prefix and path.startswith(self._card_a_prefix):
|
||||
# Only upload covers to SD card if that is supported
|
||||
if self._card_a_prefix and path.startswith(self._card_a_prefix) and not self.supports_covers_on_sdcard():
|
||||
return
|
||||
|
||||
if not opts.extra_customization[self.OPT_UPLOAD_GRAYSCALE_COVERS]:
|
||||
@ -2111,6 +2121,16 @@ class KOBOTOUCH(KOBO):
|
||||
ImageID = ImageID.replace('.', '_')
|
||||
return ImageID
|
||||
|
||||
|
||||
def images_path(self, path):
|
||||
if self._card_a_prefix and path.startswith(self._card_a_prefix) and self.supports_covers_on_sdcard():
|
||||
path_prefix = 'koboExtStorage/images/'
|
||||
path = self._card_a_prefix + path_prefix
|
||||
else:
|
||||
path_prefix = '.kobo/images/'
|
||||
path = self._main_prefix + path_prefix
|
||||
return path
|
||||
|
||||
def _upload_cover(self, path, filename, metadata, filepath, uploadgrayscale, keep_cover_aspect=False):
|
||||
from calibre.utils.magick.draw import save_cover_data_to, identify_data
|
||||
debug_print("KoboTouch:_upload_cover - filename='%s' uploadgrayscale='%s' "%(filename, uploadgrayscale))
|
||||
@ -2151,8 +2171,8 @@ class KOBOTOUCH(KOBO):
|
||||
cursor.close()
|
||||
|
||||
if ImageID != None:
|
||||
path_prefix = '.kobo/images/'
|
||||
path = self._main_prefix + path_prefix + ImageID
|
||||
path = self.images_path(path) + ImageID
|
||||
|
||||
if show_debug:
|
||||
debug_print("KoboTouch:_upload_cover - About to loop over cover endings")
|
||||
|
||||
@ -2524,6 +2544,52 @@ class KOBOTOUCH(KOBO):
|
||||
def supports_kobo_archive(self):
|
||||
return self.dbversion >= self.min_dbversion_archive
|
||||
|
||||
def supports_covers_on_sdcard(self):
|
||||
return self.dbversion >= 77 and self.fwversion >= self.min_fwversion_images_on_sdcard
|
||||
|
||||
def modify_database_check(self, function):
|
||||
# Checks to see whether the database version is supported
|
||||
# and whether the user has chosen to support the firmware version
|
||||
# debug_print("KoboTouch:modify_database_check - self.fwversion <= self.max_supported_fwversion=", self.fwversion > self.max_supported_fwversion)
|
||||
if self.dbversion > self.supported_dbversion or self.fwversion > self.max_supported_fwversion:
|
||||
# Unsupported database
|
||||
opts = self.settings()
|
||||
if not opts.extra_customization[self.OPT_SUPPORT_NEWER_FIRMWARE]:
|
||||
debug_print('The database has been upgraded past supported version')
|
||||
self.report_progress(1.0, _('Removing books from device...'))
|
||||
from calibre.devices.errors import UserFeedback
|
||||
raise UserFeedback(_("Kobo database version unsupported - See details"),
|
||||
_('Your Kobo is running an updated firmware/database version.'
|
||||
' As calibre does not know about this updated firmware,'
|
||||
' database editing is disabled, to prevent corruption.'
|
||||
' You can still send books to your Kobo with calibre, '
|
||||
' but deleting books and managing collections is disabled.'
|
||||
' If you are willing to experiment and know how to reset'
|
||||
' your Kobo to Factory defaults, you can override this'
|
||||
' check by right clicking the device icon in calibre and'
|
||||
' selecting "Configure this device" and then the '
|
||||
' "Attempt to support newer firmware" option.'
|
||||
' Doing so may require you to perform a factory reset of'
|
||||
' your Kobo.'
|
||||
),
|
||||
UserFeedback.WARN)
|
||||
|
||||
return False
|
||||
else:
|
||||
# The user chose to edit the database anyway
|
||||
return True
|
||||
else:
|
||||
# Supported database version
|
||||
return True
|
||||
|
||||
# @classmethod
|
||||
# def get_gui_name(cls):
|
||||
# if hasattr(cls, 'gui_name'):
|
||||
# return cls.gui_name
|
||||
# if hasattr(cls, '__name__'):
|
||||
# return cls.__name__
|
||||
# return cls.name
|
||||
|
||||
|
||||
@classmethod
|
||||
def is_debugging_title(cls, title):
|
||||
|
@ -4,12 +4,15 @@ __copyright__ = '2010, Fabian Grassl <fg@jusmeum.de>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, re, shutil
|
||||
from os.path import dirname, abspath, relpath, exists, basename
|
||||
from os.path import dirname, abspath, relpath as _relpath, exists, basename
|
||||
|
||||
from calibre.customize.conversion import OutputFormatPlugin, OptionRecommendation
|
||||
from calibre import CurrentDir
|
||||
from calibre.ptempfile import PersistentTemporaryDirectory
|
||||
|
||||
def relpath(*args):
|
||||
return _relpath(*args).replace(os.sep, '/')
|
||||
|
||||
class HTMLOutput(OutputFormatPlugin):
|
||||
|
||||
name = 'HTML Output'
|
||||
|
@ -97,6 +97,12 @@ class TXTInput(InputFormatPlugin):
|
||||
if not ienc:
|
||||
ienc = 'utf-8'
|
||||
log.debug('No input encoding specified and could not auto detect using %s' % ienc)
|
||||
# Remove BOM from start of txt as its presence can confuse markdown
|
||||
import codecs
|
||||
for bom in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE, codecs.BOM_UTF8, codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE):
|
||||
if txt.startswith(bom):
|
||||
txt = txt[len(bom):]
|
||||
break
|
||||
txt = txt.decode(ienc, 'replace')
|
||||
|
||||
# Replace entities
|
||||
|
@ -24,7 +24,7 @@ from calibre import prints, guess_type
|
||||
from calibre.utils.cleantext import clean_ascii_chars
|
||||
from calibre.utils.config import tweaks
|
||||
|
||||
class Resource(object): # {{{
|
||||
class Resource(object): # {{{
|
||||
'''
|
||||
Represents a resource (usually a file on the filesystem or a URL pointing
|
||||
to the web. Such resources are commonly referred to in OPF files.
|
||||
@ -68,7 +68,6 @@ class Resource(object): # {{{
|
||||
self.path = os.path.abspath(os.path.join(basedir, pc.replace('/', os.sep)))
|
||||
self.fragment = url[-1]
|
||||
|
||||
|
||||
def href(self, basedir=None):
|
||||
'''
|
||||
Return a URL pointing to this resource. If it is a file on the filesystem
|
||||
@ -90,7 +89,7 @@ class Resource(object): # {{{
|
||||
return ''+frag
|
||||
try:
|
||||
rpath = os.path.relpath(self.path, basedir)
|
||||
except ValueError: # On windows path and basedir could be on different drives
|
||||
except ValueError: # On windows path and basedir could be on different drives
|
||||
rpath = self.path
|
||||
if isinstance(rpath, unicode):
|
||||
rpath = rpath.encode('utf-8')
|
||||
@ -107,7 +106,7 @@ class Resource(object): # {{{
|
||||
|
||||
# }}}
|
||||
|
||||
class ResourceCollection(object): # {{{
|
||||
class ResourceCollection(object): # {{{
|
||||
|
||||
def __init__(self):
|
||||
self._resources = []
|
||||
@ -160,7 +159,7 @@ class ResourceCollection(object): # {{{
|
||||
|
||||
# }}}
|
||||
|
||||
class ManifestItem(Resource): # {{{
|
||||
class ManifestItem(Resource): # {{{
|
||||
|
||||
@staticmethod
|
||||
def from_opf_manifest_item(item, basedir):
|
||||
@ -180,7 +179,6 @@ class ManifestItem(Resource): # {{{
|
||||
self.mime_type = val
|
||||
return property(fget=fget, fset=fset)
|
||||
|
||||
|
||||
def __unicode__(self):
|
||||
return u'<item id="%s" href="%s" media-type="%s" />'%(self.id, self.href(), self.media_type)
|
||||
|
||||
@ -190,7 +188,6 @@ class ManifestItem(Resource): # {{{
|
||||
def __repr__(self):
|
||||
return unicode(self)
|
||||
|
||||
|
||||
def __getitem__(self, index):
|
||||
if index == 0:
|
||||
return self.href()
|
||||
@ -200,7 +197,7 @@ class ManifestItem(Resource): # {{{
|
||||
|
||||
# }}}
|
||||
|
||||
class Manifest(ResourceCollection): # {{{
|
||||
class Manifest(ResourceCollection): # {{{
|
||||
|
||||
@staticmethod
|
||||
def from_opf_manifest_element(items, dir):
|
||||
@ -245,7 +242,6 @@ class Manifest(ResourceCollection): # {{{
|
||||
ResourceCollection.__init__(self)
|
||||
self.next_id = 1
|
||||
|
||||
|
||||
def item(self, id):
|
||||
for i in self:
|
||||
if i.id == id:
|
||||
@ -269,7 +265,7 @@ class Manifest(ResourceCollection): # {{{
|
||||
|
||||
# }}}
|
||||
|
||||
class Spine(ResourceCollection): # {{{
|
||||
class Spine(ResourceCollection): # {{{
|
||||
|
||||
class Item(Resource):
|
||||
|
||||
@ -309,13 +305,10 @@ class Spine(ResourceCollection): # {{{
|
||||
continue
|
||||
return s
|
||||
|
||||
|
||||
|
||||
def __init__(self, manifest):
|
||||
ResourceCollection.__init__(self)
|
||||
self.manifest = manifest
|
||||
|
||||
|
||||
def replace(self, start, end, ids):
|
||||
'''
|
||||
Replace the items between start (inclusive) and end (not inclusive) with
|
||||
@ -345,7 +338,7 @@ class Spine(ResourceCollection): # {{{
|
||||
|
||||
# }}}
|
||||
|
||||
class Guide(ResourceCollection): # {{{
|
||||
class Guide(ResourceCollection): # {{{
|
||||
|
||||
class Reference(Resource):
|
||||
|
||||
@ -363,7 +356,6 @@ class Guide(ResourceCollection): # {{{
|
||||
ans += 'title="%s" '%self.title
|
||||
return ans + '/>'
|
||||
|
||||
|
||||
@staticmethod
|
||||
def from_opf_guide(references, base_dir=os.getcwdu()):
|
||||
coll = Guide()
|
||||
@ -484,14 +476,14 @@ def dump_dict(cats):
|
||||
return json.dumps(object_to_unicode(cats), ensure_ascii=False,
|
||||
skipkeys=True)
|
||||
|
||||
class OPF(object): # {{{
|
||||
class OPF(object): # {{{
|
||||
|
||||
MIMETYPE = 'application/oebps-package+xml'
|
||||
PARSER = etree.XMLParser(recover=True)
|
||||
NAMESPACES = {
|
||||
None : "http://www.idpf.org/2007/opf",
|
||||
'dc' : "http://purl.org/dc/elements/1.1/",
|
||||
'opf' : "http://www.idpf.org/2007/opf",
|
||||
None: "http://www.idpf.org/2007/opf",
|
||||
'dc': "http://purl.org/dc/elements/1.1/",
|
||||
'opf': "http://www.idpf.org/2007/opf",
|
||||
}
|
||||
META = '{%s}meta' % NAMESPACES['opf']
|
||||
xpn = NAMESPACES.copy()
|
||||
@ -501,9 +493,10 @@ class OPF(object): # {{{
|
||||
CONTENT = XPath('self::*[re:match(name(), "meta$", "i")]/@content')
|
||||
TEXT = XPath('string()')
|
||||
|
||||
|
||||
metadata_path = XPath('descendant::*[re:match(name(), "metadata", "i")]')
|
||||
metadata_elem_path = XPath('descendant::*[re:match(name(), concat($name, "$"), "i") or (re:match(name(), "meta$", "i") and re:match(@name, concat("^calibre:", $name, "$"), "i"))]')
|
||||
metadata_elem_path = XPath(
|
||||
'descendant::*[re:match(name(), concat($name, "$"), "i") or (re:match(name(), "meta$", "i") '
|
||||
'and re:match(@name, concat("^calibre:", $name, "$"), "i"))]')
|
||||
title_path = XPath('descendant::*[re:match(name(), "title", "i")]')
|
||||
authors_path = XPath('descendant::*[re:match(name(), "creator", "i") and (@role="aut" or @opf:role="aut" or (not(@role) and not(@opf:role)))]')
|
||||
bkp_path = XPath('descendant::*[re:match(name(), "contributor", "i") and (@role="bkp" or @opf:role="bkp")]')
|
||||
@ -640,7 +633,8 @@ class OPF(object): # {{{
|
||||
if 'toc' in item.href().lower():
|
||||
toc = item.path
|
||||
|
||||
if toc is None: return
|
||||
if toc is None:
|
||||
return
|
||||
self.toc = TOC(base_path=self.base_dir)
|
||||
is_ncx = getattr(self, 'manifest', None) is not None and \
|
||||
self.manifest.type_for_id(toc) is not None and \
|
||||
@ -976,7 +970,6 @@ class OPF(object): # {{{
|
||||
|
||||
return property(fget=fget, fset=fset)
|
||||
|
||||
|
||||
@dynamic_property
|
||||
def language(self):
|
||||
|
||||
@ -990,7 +983,6 @@ class OPF(object): # {{{
|
||||
|
||||
return property(fget=fget, fset=fset)
|
||||
|
||||
|
||||
@dynamic_property
|
||||
def languages(self):
|
||||
|
||||
@ -1015,7 +1007,6 @@ class OPF(object): # {{{
|
||||
|
||||
return property(fget=fget, fset=fset)
|
||||
|
||||
|
||||
@dynamic_property
|
||||
def book_producer(self):
|
||||
|
||||
@ -1196,7 +1187,6 @@ class OPFCreator(Metadata):
|
||||
if self.cover:
|
||||
self.guide.set_cover(self.cover)
|
||||
|
||||
|
||||
def create_manifest(self, entries):
|
||||
'''
|
||||
Create <manifest>
|
||||
@ -1615,9 +1605,9 @@ def test_user_metadata():
|
||||
from cStringIO import StringIO
|
||||
mi = Metadata('Test title', ['test author1', 'test author2'])
|
||||
um = {
|
||||
'#myseries': { '#value#': u'test series\xe4', 'datatype':'text',
|
||||
'#myseries': {'#value#': u'test series\xe4', 'datatype':'text',
|
||||
'is_multiple': None, 'name': u'My Series'},
|
||||
'#myseries_index': { '#value#': 2.45, 'datatype': 'float',
|
||||
'#myseries_index': {'#value#': 2.45, 'datatype': 'float',
|
||||
'is_multiple': None},
|
||||
'#mytags': {'#value#':['t1','t2','t3'], 'datatype':'text',
|
||||
'is_multiple': '|', 'name': u'My Tags'}
|
||||
|
@ -21,7 +21,7 @@ from calibre.ebooks.metadata.book.base import Metadata
|
||||
from calibre.utils.date import parse_only_date
|
||||
from calibre.utils.localization import canonicalize_lang
|
||||
|
||||
class Worker(Thread): # Get details {{{
|
||||
class Worker(Thread): # Get details {{{
|
||||
|
||||
'''
|
||||
Get book details from amazons book page in a separate thread
|
||||
@ -43,12 +43,12 @@ class Worker(Thread): # Get details {{{
|
||||
|
||||
months = {
|
||||
'de': {
|
||||
1 : ['jän'],
|
||||
2 : ['februar'],
|
||||
3 : ['märz'],
|
||||
5 : ['mai'],
|
||||
6 : ['juni'],
|
||||
7 : ['juli'],
|
||||
1: ['jän'],
|
||||
2: ['februar'],
|
||||
3: ['märz'],
|
||||
5: ['mai'],
|
||||
6: ['juni'],
|
||||
7: ['juli'],
|
||||
10: ['okt'],
|
||||
12: ['dez']
|
||||
},
|
||||
@ -132,7 +132,7 @@ class Worker(Thread): # Get details {{{
|
||||
text()="Détails sur le produit" or \
|
||||
text()="Detalles del producto" or \
|
||||
text()="Detalhes do produto" or \
|
||||
text()="登録情報"]/../div[@class="content"]
|
||||
starts-with(text(), "登録情報")]/../div[@class="content"]
|
||||
'''
|
||||
# Editor: is for Spanish
|
||||
self.publisher_xpath = '''
|
||||
@ -235,6 +235,12 @@ class Worker(Thread): # Get details {{{
|
||||
msg = 'Failed to parse amazon details page: %r'%self.url
|
||||
self.log.exception(msg)
|
||||
return
|
||||
if self.domain == 'jp':
|
||||
for a in root.xpath('//a[@href]'):
|
||||
if 'black-curtain-redirect.html' in a.get('href'):
|
||||
self.url = 'http://amazon.co.jp'+a.get('href')
|
||||
self.log('Black curtain redirect found, following')
|
||||
return self.get_details()
|
||||
|
||||
errmsg = root.xpath('//*[@id="errorMessage"]')
|
||||
if errmsg:
|
||||
@ -252,8 +258,8 @@ class Worker(Thread): # Get details {{{
|
||||
self.log.exception('Error parsing asin for url: %r'%self.url)
|
||||
asin = None
|
||||
if self.testing:
|
||||
import tempfile
|
||||
with tempfile.NamedTemporaryFile(prefix=asin + '_',
|
||||
import tempfile, uuid
|
||||
with tempfile.NamedTemporaryFile(prefix=(asin or str(uuid.uuid4()))+ '_',
|
||||
suffix='.html', delete=False) as f:
|
||||
f.write(raw)
|
||||
print ('Downloaded html for', asin, 'saved in', f.name)
|
||||
@ -270,7 +276,6 @@ class Worker(Thread): # Get details {{{
|
||||
self.log.exception('Error parsing authors for url: %r'%self.url)
|
||||
authors = []
|
||||
|
||||
|
||||
if not title or not authors or not asin:
|
||||
self.log.error('Could not find title/authors/asin for %r'%self.url)
|
||||
self.log.error('ASIN: %r Title: %r Authors: %r'%(asin, title,
|
||||
@ -425,7 +430,6 @@ class Worker(Thread): # Get details {{{
|
||||
desc = re.sub(r'(?s)<!--.*?-->', '', desc)
|
||||
return sanitize_comments_html(desc)
|
||||
|
||||
|
||||
def parse_comments(self, root):
|
||||
ans = ''
|
||||
desc = root.xpath('//div[@id="ps-content"]/div[@class="content"]')
|
||||
@ -499,7 +503,7 @@ class Worker(Thread): # Get details {{{
|
||||
def parse_language(self, pd):
|
||||
for x in reversed(pd.xpath(self.language_xpath)):
|
||||
if x.tail:
|
||||
raw = x.tail.strip()
|
||||
raw = x.tail.strip().partition(',')[0].strip()
|
||||
ans = self.lang_map.get(raw, None)
|
||||
if ans:
|
||||
return ans
|
||||
@ -522,13 +526,13 @@ class Amazon(Source):
|
||||
|
||||
AMAZON_DOMAINS = {
|
||||
'com': _('US'),
|
||||
'fr' : _('France'),
|
||||
'de' : _('Germany'),
|
||||
'uk' : _('UK'),
|
||||
'it' : _('Italy'),
|
||||
'jp' : _('Japan'),
|
||||
'es' : _('Spain'),
|
||||
'br' : _('Brazil'),
|
||||
'fr': _('France'),
|
||||
'de': _('Germany'),
|
||||
'uk': _('UK'),
|
||||
'it': _('Italy'),
|
||||
'jp': _('Japan'),
|
||||
'es': _('Spain'),
|
||||
'br': _('Brazil'),
|
||||
}
|
||||
|
||||
options = (
|
||||
@ -586,7 +590,7 @@ class Amazon(Source):
|
||||
return domain, val
|
||||
return None, None
|
||||
|
||||
def get_book_url(self, identifiers): # {{{
|
||||
def get_book_url(self, identifiers): # {{{
|
||||
domain, asin = self.get_domain_and_asin(identifiers)
|
||||
if domain and asin:
|
||||
url = None
|
||||
@ -631,8 +635,7 @@ class Amazon(Source):
|
||||
mi.tags = list(map(fixcase, mi.tags))
|
||||
mi.isbn = check_isbn(mi.isbn)
|
||||
|
||||
|
||||
def create_query(self, log, title=None, authors=None, identifiers={}, # {{{
|
||||
def create_query(self, log, title=None, authors=None, identifiers={}, # {{{
|
||||
domain=None):
|
||||
if domain is None:
|
||||
domain = self.domain
|
||||
@ -642,8 +645,8 @@ class Amazon(Source):
|
||||
domain = idomain
|
||||
|
||||
# See the amazon detailed search page to get all options
|
||||
q = { 'search-alias' : 'aps',
|
||||
'unfiltered' : '1',
|
||||
q = {'search-alias': 'aps',
|
||||
'unfiltered': '1',
|
||||
}
|
||||
|
||||
if domain == 'com':
|
||||
@ -698,7 +701,7 @@ class Amazon(Source):
|
||||
|
||||
# }}}
|
||||
|
||||
def get_cached_cover_url(self, identifiers): # {{{
|
||||
def get_cached_cover_url(self, identifiers): # {{{
|
||||
url = None
|
||||
domain, asin = self.get_domain_and_asin(identifiers)
|
||||
if asin is None:
|
||||
@ -711,14 +714,17 @@ class Amazon(Source):
|
||||
return url
|
||||
# }}}
|
||||
|
||||
def parse_results_page(self, root): # {{{
|
||||
def parse_results_page(self, root): # {{{
|
||||
from lxml.html import tostring
|
||||
|
||||
matches = []
|
||||
|
||||
def title_ok(title):
|
||||
title = title.lower()
|
||||
for x in ('bulk pack', '[audiobook]', '[audio cd]'):
|
||||
bad = ['bulk pack', '[audiobook]', '[audio cd]']
|
||||
if self.domain == 'com':
|
||||
bad.append('(spanish edition)')
|
||||
for x in bad:
|
||||
if x in title:
|
||||
return False
|
||||
return True
|
||||
@ -745,13 +751,12 @@ class Amazon(Source):
|
||||
matches.append(a.get('href'))
|
||||
break
|
||||
|
||||
|
||||
# Keep only the top 5 matches as the matches are sorted by relevance by
|
||||
# Amazon so lower matches are not likely to be very relevant
|
||||
return matches[:5]
|
||||
# }}}
|
||||
|
||||
def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
|
||||
def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
|
||||
identifiers={}, timeout=30):
|
||||
'''
|
||||
Note this method will retry without identifiers automatically if no
|
||||
@ -789,7 +794,6 @@ class Amazon(Source):
|
||||
log.exception(msg)
|
||||
return as_unicode(msg)
|
||||
|
||||
|
||||
raw = clean_ascii_chars(xml_to_unicode(raw,
|
||||
strip_encoding_pats=True, resolve_entities=True)[0])
|
||||
|
||||
@ -819,7 +823,6 @@ class Amazon(Source):
|
||||
# The error is almost always a not found error
|
||||
found = False
|
||||
|
||||
|
||||
if found:
|
||||
matches = self.parse_results_page(root)
|
||||
|
||||
@ -857,7 +860,7 @@ class Amazon(Source):
|
||||
return None
|
||||
# }}}
|
||||
|
||||
def download_cover(self, log, result_queue, abort, # {{{
|
||||
def download_cover(self, log, result_queue, abort, # {{{
|
||||
title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False):
|
||||
cached_url = self.get_cached_cover_url(identifiers)
|
||||
if cached_url is None:
|
||||
@ -894,39 +897,44 @@ class Amazon(Source):
|
||||
log.exception('Failed to download cover from:', cached_url)
|
||||
# }}}
|
||||
|
||||
if __name__ == '__main__': # tests {{{
|
||||
if __name__ == '__main__': # tests {{{
|
||||
# To run these test use: calibre-debug -e
|
||||
# src/calibre/ebooks/metadata/sources/amazon.py
|
||||
from calibre.ebooks.metadata.sources.test import (test_identify_plugin,
|
||||
isbn_test, title_test, authors_test, comments_test, series_test)
|
||||
com_tests = [ # {{{
|
||||
com_tests = [ # {{{
|
||||
|
||||
( # + in title and uses id="main-image" for cover
|
||||
( # Has a spanish edition
|
||||
{'title':'11/22/63'},
|
||||
[title_test('11/22/63: A Novel', exact=True), authors_test(['Stephen King']),]
|
||||
),
|
||||
|
||||
( # + in title and uses id="main-image" for cover
|
||||
{'title':'C++ Concurrency in Action'},
|
||||
[title_test('C++ Concurrency in Action: Practical Multithreading',
|
||||
exact=True),
|
||||
]
|
||||
),
|
||||
|
||||
( # Series
|
||||
( # Series
|
||||
{'identifiers':{'amazon':'0756407117'}},
|
||||
[title_test(
|
||||
"Throne of the Crescent Moon"
|
||||
, exact=True), series_test('Crescent Moon Kingdoms', 1),
|
||||
"Throne of the Crescent Moon",
|
||||
exact=True), series_test('Crescent Moon Kingdoms', 1),
|
||||
comments_test('Makhslood'),
|
||||
]
|
||||
),
|
||||
|
||||
( # Different comments markup, using Book Description section
|
||||
( # Different comments markup, using Book Description section
|
||||
{'identifiers':{'amazon':'0982514506'}},
|
||||
[title_test(
|
||||
"Griffin's Destiny: Book Three: The Griffin's Daughter Trilogy"
|
||||
, exact=True),
|
||||
"Griffin's Destiny: Book Three: The Griffin's Daughter Trilogy",
|
||||
exact=True),
|
||||
comments_test('Jelena'), comments_test('Leslie'),
|
||||
]
|
||||
),
|
||||
|
||||
( # # in title
|
||||
( # # in title
|
||||
{'title':'Expert C# 2008 Business Objects',
|
||||
'authors':['Lhotka']},
|
||||
[title_test('Expert C# 2008 Business Objects', exact=True),
|
||||
@ -942,13 +950,13 @@ if __name__ == '__main__': # tests {{{
|
||||
|
||||
),
|
||||
|
||||
( # Sophisticated comment formatting
|
||||
( # Sophisticated comment formatting
|
||||
{'identifiers':{'isbn': '9781416580829'}},
|
||||
[title_test('Angels & Demons - Movie Tie-In: A Novel',
|
||||
exact=True), authors_test(['Dan Brown'])]
|
||||
),
|
||||
|
||||
( # No specific problems
|
||||
( # No specific problems
|
||||
{'identifiers':{'isbn': '0743273567'}},
|
||||
[title_test('The great gatsby', exact=True),
|
||||
authors_test(['F. Scott Fitzgerald'])]
|
||||
@ -961,9 +969,9 @@ if __name__ == '__main__': # tests {{{
|
||||
|
||||
),
|
||||
|
||||
] # }}}
|
||||
] # }}}
|
||||
|
||||
de_tests = [ # {{{
|
||||
de_tests = [ # {{{
|
||||
(
|
||||
{'identifiers':{'isbn': '3548283519'}},
|
||||
[title_test('Wer Wind Sät: Der Fünfte Fall Für Bodenstein Und Kirchhoff',
|
||||
@ -971,9 +979,9 @@ if __name__ == '__main__': # tests {{{
|
||||
]
|
||||
|
||||
),
|
||||
] # }}}
|
||||
] # }}}
|
||||
|
||||
it_tests = [ # {{{
|
||||
it_tests = [ # {{{
|
||||
(
|
||||
{'identifiers':{'isbn': '8838922195'}},
|
||||
[title_test('La briscola in cinque',
|
||||
@ -981,9 +989,9 @@ if __name__ == '__main__': # tests {{{
|
||||
]
|
||||
|
||||
),
|
||||
] # }}}
|
||||
] # }}}
|
||||
|
||||
fr_tests = [ # {{{
|
||||
fr_tests = [ # {{{
|
||||
(
|
||||
{'identifiers':{'isbn': '2221116798'}},
|
||||
[title_test('L\'étrange voyage de Monsieur Daldry',
|
||||
@ -991,9 +999,9 @@ if __name__ == '__main__': # tests {{{
|
||||
]
|
||||
|
||||
),
|
||||
] # }}}
|
||||
] # }}}
|
||||
|
||||
es_tests = [ # {{{
|
||||
es_tests = [ # {{{
|
||||
(
|
||||
{'identifiers':{'isbn': '8483460831'}},
|
||||
[title_test('Tiempos Interesantes',
|
||||
@ -1001,23 +1009,28 @@ if __name__ == '__main__': # tests {{{
|
||||
]
|
||||
|
||||
),
|
||||
] # }}}
|
||||
] # }}}
|
||||
|
||||
jp_tests = [ # {{{
|
||||
( # isbn -> title, authors
|
||||
{'identifiers':{'isbn': '9784101302720' }},
|
||||
jp_tests = [ # {{{
|
||||
( # Adult filtering test
|
||||
{'identifiers':{'isbn':'4799500066'}},
|
||||
[title_test(u'Bitch Trap'),]
|
||||
),
|
||||
|
||||
( # isbn -> title, authors
|
||||
{'identifiers':{'isbn': '9784101302720'}},
|
||||
[title_test(u'精霊の守り人',
|
||||
exact=True), authors_test([u'上橋 菜穂子'])
|
||||
]
|
||||
),
|
||||
( # title, authors -> isbn (will use Shift_JIS encoding in query.)
|
||||
( # title, authors -> isbn (will use Shift_JIS encoding in query.)
|
||||
{'title': u'考えない練習',
|
||||
'authors': [u'小池 龍之介']},
|
||||
[isbn_test('9784093881067'), ]
|
||||
),
|
||||
] # }}}
|
||||
] # }}}
|
||||
|
||||
br_tests = [ # {{{
|
||||
br_tests = [ # {{{
|
||||
(
|
||||
{'title':'Guerra dos Tronos'},
|
||||
[title_test('A Guerra dos Tronos - As Crônicas de Gelo e Fogo',
|
||||
@ -1025,7 +1038,7 @@ if __name__ == '__main__': # tests {{{
|
||||
]
|
||||
|
||||
),
|
||||
] # }}}
|
||||
] # }}}
|
||||
|
||||
def do_test(domain, start=0, stop=None):
|
||||
tests = globals().get(domain+'_tests')
|
||||
|
@ -31,7 +31,7 @@ msprefs.defaults['find_first_edition_date'] = False
|
||||
# Google covers are often poor quality (scans/errors) but they have high
|
||||
# resolution, so they trump covers from better sources. So make sure they
|
||||
# are only used if no other covers are found.
|
||||
msprefs.defaults['cover_priorities'] = {'Google':2, 'Google Images':2}
|
||||
msprefs.defaults['cover_priorities'] = {'Google':2, 'Google Images':2, 'Big Book Search':2}
|
||||
|
||||
def create_log(ostream=None):
|
||||
from calibre.utils.logging import ThreadSafeLog, FileStream
|
||||
@ -429,6 +429,40 @@ class Source(Plugin):
|
||||
mi.tags = list(map(fixcase, mi.tags))
|
||||
mi.isbn = check_isbn(mi.isbn)
|
||||
|
||||
def download_multiple_covers(self, title, authors, urls, get_best_cover, timeout, result_queue, abort, log, prefs_name='max_covers'):
|
||||
if not urls:
|
||||
log('No images found for, title: %r and authors: %r'%(title, authors))
|
||||
return
|
||||
from threading import Thread
|
||||
import time
|
||||
if prefs_name:
|
||||
urls = urls[:self.prefs[prefs_name]]
|
||||
if get_best_cover:
|
||||
urls = urls[:1]
|
||||
log('Downloading %d covers'%len(urls))
|
||||
workers = [Thread(target=self.download_image, args=(u, timeout, log, result_queue)) for u in urls]
|
||||
for w in workers:
|
||||
w.daemon = True
|
||||
w.start()
|
||||
alive = True
|
||||
start_time = time.time()
|
||||
while alive and not abort.is_set() and time.time() - start_time < timeout:
|
||||
alive = False
|
||||
for w in workers:
|
||||
if w.is_alive():
|
||||
alive = True
|
||||
break
|
||||
abort.wait(0.1)
|
||||
|
||||
def download_image(self, url, timeout, log, result_queue):
|
||||
try:
|
||||
ans = self.browser.open_novisit(url, timeout=timeout).read()
|
||||
result_queue.put((self, ans))
|
||||
log('Downloaded cover from: %s'%url)
|
||||
except Exception:
|
||||
self.log.exception('Failed to download cover from: %r'%url)
|
||||
|
||||
|
||||
# }}}
|
||||
|
||||
# Metadata API {{{
|
||||
|
58
src/calibre/ebooks/metadata/sources/big_book_search.py
Normal file
@ -0,0 +1,58 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from calibre.ebooks.metadata.sources.base import Source, Option
|
||||
|
||||
def get_urls(br, tokens):
|
||||
from urllib import quote_plus
|
||||
from mechanize import Request
|
||||
from lxml import html
|
||||
escaped = [quote_plus(x.encode('utf-8')) for x in tokens if x and x.strip()]
|
||||
q = b'+'.join(escaped)
|
||||
url = 'http://bigbooksearch.com/books/'+q
|
||||
br.open(url).read()
|
||||
req = Request('http://bigbooksearch.com/query.php?SearchIndex=books&Keywords=%s&ItemPage=1'%q)
|
||||
req.add_header('X-Requested-With', 'XMLHttpRequest')
|
||||
req.add_header('Referer', url)
|
||||
raw = br.open(req).read()
|
||||
root = html.fromstring(raw.decode('utf-8'))
|
||||
urls = [i.get('src') for i in root.xpath('//img[@src]')]
|
||||
return urls
|
||||
|
||||
class BigBookSearch(Source):
|
||||
|
||||
name = 'Big Book Search'
|
||||
description = _('Downloads multiple book covers from Amazon. Useful to find alternate covers.')
|
||||
capabilities = frozenset(['cover'])
|
||||
config_help_message = _('Configure the Big Book Search plugin')
|
||||
can_get_multiple_covers = True
|
||||
options = (Option('max_covers', 'number', 5, _('Maximum number of covers to get'),
|
||||
_('The maximum number of covers to process from the search result')),
|
||||
)
|
||||
supports_gzip_transfer_encoding = True
|
||||
|
||||
def download_cover(self, log, result_queue, abort,
|
||||
title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False):
|
||||
if not title:
|
||||
return
|
||||
br = self.browser
|
||||
tokens = tuple(self.get_title_tokens(title)) + tuple(self.get_author_tokens(authors))
|
||||
urls = get_urls(br, tokens)
|
||||
self.download_multiple_covers(title, authors, urls, get_best_cover, timeout, result_queue, abort, log)
|
||||
|
||||
def test():
|
||||
from calibre import browser
|
||||
import pprint
|
||||
br = browser()
|
||||
urls = get_urls(br, ['consider', 'phlebas', 'banks'])
|
||||
pprint.pprint(urls)
|
||||
|
||||
if __name__ == '__main__':
|
||||
test()
|
||||
|
@ -18,12 +18,13 @@ from calibre.utils.magick.draw import Image, save_cover_data_to
|
||||
|
||||
class Worker(Thread):
|
||||
|
||||
def __init__(self, plugin, abort, title, authors, identifiers, timeout, rq):
|
||||
def __init__(self, plugin, abort, title, authors, identifiers, timeout, rq, get_best_cover=False):
|
||||
Thread.__init__(self)
|
||||
self.daemon = True
|
||||
|
||||
self.plugin = plugin
|
||||
self.abort = abort
|
||||
self.get_best_cover = get_best_cover
|
||||
self.buf = BytesIO()
|
||||
self.log = create_log(self.buf)
|
||||
self.title, self.authors, self.identifiers = (title, authors,
|
||||
@ -37,7 +38,7 @@ class Worker(Thread):
|
||||
try:
|
||||
if self.plugin.can_get_multiple_covers:
|
||||
self.plugin.download_cover(self.log, self.rq, self.abort,
|
||||
title=self.title, authors=self.authors, get_best_cover=True,
|
||||
title=self.title, authors=self.authors, get_best_cover=self.get_best_cover,
|
||||
identifiers=self.identifiers, timeout=self.timeout)
|
||||
else:
|
||||
self.plugin.download_cover(self.log, self.rq, self.abort,
|
||||
@ -72,7 +73,7 @@ def process_result(log, result):
|
||||
return (plugin, width, height, fmt, data)
|
||||
|
||||
def run_download(log, results, abort,
|
||||
title=None, authors=None, identifiers={}, timeout=30):
|
||||
title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False):
|
||||
'''
|
||||
Run the cover download, putting results into the queue :param:`results`.
|
||||
|
||||
@ -89,7 +90,7 @@ def run_download(log, results, abort,
|
||||
plugins = [p for p in metadata_plugins(['cover']) if p.is_configured()]
|
||||
|
||||
rq = Queue()
|
||||
workers = [Worker(p, abort, title, authors, identifiers, timeout, rq) for p
|
||||
workers = [Worker(p, abort, title, authors, identifiers, timeout, rq, get_best_cover=get_best_cover) for p
|
||||
in plugins]
|
||||
for w in workers:
|
||||
w.start()
|
||||
@ -163,7 +164,7 @@ def download_cover(log,
|
||||
abort = Event()
|
||||
|
||||
run_download(log, rq, abort, title=title, authors=authors,
|
||||
identifiers=identifiers, timeout=timeout)
|
||||
identifiers=identifiers, timeout=timeout, get_best_cover=True)
|
||||
|
||||
results = []
|
||||
|
||||
|
@ -106,6 +106,8 @@ class Worker(Thread): # {{{
|
||||
parts = pub.partition(':')[0::2]
|
||||
pub = parts[1] or parts[0]
|
||||
try:
|
||||
if ', Ship Date:' in pub:
|
||||
pub = pub.partition(', Ship Date:')[0]
|
||||
q = parse_only_date(pub, assume_utc=True)
|
||||
if q.year != UNDEFINED_DATE:
|
||||
mi.pubdate = q
|
||||
|
@ -39,39 +39,11 @@ class GoogleImages(Source):
|
||||
title=None, authors=None, identifiers={}, timeout=30, get_best_cover=False):
|
||||
if not title:
|
||||
return
|
||||
from threading import Thread
|
||||
import time
|
||||
timeout = max(60, timeout) # Needs at least a minute
|
||||
title = ' '.join(self.get_title_tokens(title))
|
||||
author = ' '.join(self.get_author_tokens(authors))
|
||||
urls = self.get_image_urls(title, author, log, abort, timeout)
|
||||
if not urls:
|
||||
log('No images found in Google for, title: %r and authors: %r'%(title, author))
|
||||
return
|
||||
urls = urls[:self.prefs['max_covers']]
|
||||
if get_best_cover:
|
||||
urls = urls[:1]
|
||||
workers = [Thread(target=self.download_image, args=(url, timeout, log, result_queue)) for url in urls]
|
||||
for w in workers:
|
||||
w.daemon = True
|
||||
w.start()
|
||||
alive = True
|
||||
start_time = time.time()
|
||||
while alive and not abort.is_set() and time.time() - start_time < timeout:
|
||||
alive = False
|
||||
for w in workers:
|
||||
if w.is_alive():
|
||||
alive = True
|
||||
break
|
||||
abort.wait(0.1)
|
||||
|
||||
def download_image(self, url, timeout, log, result_queue):
|
||||
try:
|
||||
ans = self.browser.open_novisit(url, timeout=timeout).read()
|
||||
result_queue.put((self, ans))
|
||||
log('Downloaded cover from: %s'%url)
|
||||
except Exception:
|
||||
self.log.exception('Failed to download cover from: %r'%url)
|
||||
self.download_multiple_covers(title, authors, urls, get_best_cover, timeout, result_queue, abort, log)
|
||||
|
||||
def get_image_urls(self, title, author, log, abort, timeout):
|
||||
from calibre.utils.ipc.simple_worker import fork_job, WorkerError
|
||||
|
@ -51,9 +51,11 @@ def reverse_tag_iter(block):
|
||||
end = len(block)
|
||||
while True:
|
||||
pgt = block.rfind(b'>', 0, end)
|
||||
if pgt == -1: break
|
||||
if pgt == -1:
|
||||
break
|
||||
plt = block.rfind(b'<', 0, pgt)
|
||||
if plt == -1: break
|
||||
if plt == -1:
|
||||
break
|
||||
yield block[plt:pgt+1]
|
||||
end = plt
|
||||
|
||||
@ -231,12 +233,12 @@ class Mobi8Reader(object):
|
||||
flowpart = self.flows[j]
|
||||
nstr = '%04d' % j
|
||||
m = svg_tag_pattern.search(flowpart)
|
||||
if m != None:
|
||||
if m is not None:
|
||||
# svg
|
||||
typ = 'svg'
|
||||
start = m.start()
|
||||
m2 = image_tag_pattern.search(flowpart)
|
||||
if m2 != None:
|
||||
if m2 is not None:
|
||||
format = 'inline'
|
||||
dir = None
|
||||
fname = None
|
||||
@ -320,7 +322,7 @@ class Mobi8Reader(object):
|
||||
if len(pos_fid) != 2:
|
||||
continue
|
||||
except TypeError:
|
||||
continue # thumbnailstandard record, ignore it
|
||||
continue # thumbnailstandard record, ignore it
|
||||
linktgt, idtext = self.get_id_tag_by_pos_fid(*pos_fid)
|
||||
if idtext:
|
||||
linktgt += b'#' + idtext
|
||||
@ -389,7 +391,7 @@ class Mobi8Reader(object):
|
||||
href = None
|
||||
if typ in {b'FLIS', b'FCIS', b'SRCS', b'\xe9\x8e\r\n',
|
||||
b'RESC', b'BOUN', b'FDST', b'DATP', b'AUDI', b'VIDE'}:
|
||||
pass # Ignore these records
|
||||
pass # Ignore these records
|
||||
elif typ == b'FONT':
|
||||
font = read_font_record(data)
|
||||
href = "fonts/%05d.%s" % (fname_idx, font['ext'])
|
||||
@ -406,7 +408,11 @@ class Mobi8Reader(object):
|
||||
else:
|
||||
imgtype = what(None, data)
|
||||
if imgtype is None:
|
||||
imgtype = 'unknown'
|
||||
from calibre.utils.magick.draw import identify_data
|
||||
try:
|
||||
imgtype = identify_data(data)[2]
|
||||
except Exception:
|
||||
imgtype = 'unknown'
|
||||
href = 'images/%05d.%s'%(fname_idx, imgtype)
|
||||
with open(href.replace('/', os.sep), 'wb') as f:
|
||||
f.write(data)
|
||||
|
@ -19,7 +19,7 @@ from calibre.ebooks.mobi.reader.mobi8 import Mobi8Reader
|
||||
from calibre.ebooks.conversion.plumber import Plumber, create_oebbook
|
||||
from calibre.customize.ui import (plugin_for_input_format,
|
||||
plugin_for_output_format)
|
||||
from calibre.utils.ipc.simple_worker import fork_job
|
||||
from calibre.utils.ipc.simple_worker import fork_job
|
||||
|
||||
class BadFormat(ValueError):
|
||||
pass
|
||||
@ -72,7 +72,8 @@ def explode(path, dest, question=lambda x:True):
|
||||
dest), no_output=True)['result']
|
||||
|
||||
def set_cover(oeb):
|
||||
if 'cover' not in oeb.guide or oeb.metadata['cover']: return
|
||||
if 'cover' not in oeb.guide or oeb.metadata['cover']:
|
||||
return
|
||||
cover = oeb.guide['cover']
|
||||
if cover.href in oeb.manifest.hrefs:
|
||||
item = oeb.manifest.hrefs[cover.href]
|
||||
@ -95,8 +96,9 @@ def rebuild(src_dir, dest_path):
|
||||
if not opf:
|
||||
raise ValueError('No OPF file found in %s'%src_dir)
|
||||
opf = opf[0]
|
||||
# For debugging, uncomment the following line
|
||||
# def fork_job(a, b, args=None, no_output=True): do_rebuild(*args)
|
||||
# For debugging, uncomment the following two lines
|
||||
# def fork_job(a, b, args=None, no_output=True):
|
||||
# do_rebuild(*args)
|
||||
fork_job('calibre.ebooks.mobi.tweak', 'do_rebuild', args=(opf, dest_path),
|
||||
no_output=True)
|
||||
|
||||
|
@ -69,7 +69,8 @@ class Resources(object):
|
||||
cover_href = item.href
|
||||
|
||||
for item in self.oeb.manifest.values():
|
||||
if item.media_type not in OEB_RASTER_IMAGES: continue
|
||||
if item.media_type not in OEB_RASTER_IMAGES:
|
||||
continue
|
||||
try:
|
||||
data = self.process_image(item.data)
|
||||
except:
|
||||
@ -116,8 +117,8 @@ class Resources(object):
|
||||
Add any images that were created after the call to add_resources()
|
||||
'''
|
||||
for item in self.oeb.manifest.values():
|
||||
if (item.media_type not in OEB_RASTER_IMAGES or item.href in
|
||||
self.item_map): continue
|
||||
if (item.media_type not in OEB_RASTER_IMAGES or item.href in self.item_map):
|
||||
continue
|
||||
try:
|
||||
data = self.process_image(item.data)
|
||||
except:
|
||||
|
@ -270,7 +270,7 @@ BINARY_MIME = 'application/octet-stream'
|
||||
|
||||
XHTML_CSS_NAMESPACE = u'@namespace "%s";\n' % XHTML_NS
|
||||
|
||||
OEB_STYLES = set([CSS_MIME, OEB_CSS_MIME, 'text/x-oeb-css'])
|
||||
OEB_STYLES = set([CSS_MIME, OEB_CSS_MIME, 'text/x-oeb-css', 'xhtml/css'])
|
||||
OEB_DOCS = set([XHTML_MIME, 'text/html', OEB_DOC_MIME,
|
||||
'text/x-oeb-document'])
|
||||
OEB_RASTER_IMAGES = set([GIF_MIME, JPEG_MIME, PNG_MIME])
|
||||
|
@ -30,7 +30,7 @@ from calibre.ebooks.oeb.base import (
|
||||
from calibre.ebooks.oeb.polish.errors import InvalidBook, DRMError
|
||||
from calibre.ebooks.oeb.parse_utils import NotHTML, parse_html, RECOVER_PARSER
|
||||
from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile
|
||||
from calibre.utils.ipc.simple_worker import fork_job, WorkerError
|
||||
from calibre.utils.ipc.simple_worker import fork_job, WorkerError
|
||||
from calibre.utils.logging import default_log
|
||||
from calibre.utils.zipfile import ZipFile
|
||||
|
||||
@ -77,7 +77,7 @@ class Container(object):
|
||||
|
||||
# Map of relative paths with '/' separators from root of unzipped ePub
|
||||
# to absolute paths on filesystem with os-specific separators
|
||||
opfpath = os.path.abspath(opfpath)
|
||||
opfpath = os.path.abspath(os.path.realpath(opfpath))
|
||||
for dirpath, _dirnames, filenames in os.walk(self.root):
|
||||
for f in filenames:
|
||||
path = join(dirpath, f)
|
||||
@ -406,8 +406,9 @@ class Container(object):
|
||||
child.get('content', '').strip() in {'{}', ''}):
|
||||
remove.add(child)
|
||||
except AttributeError:
|
||||
continue # Happens for XML comments
|
||||
for child in remove: mdata.remove(child)
|
||||
continue # Happens for XML comments
|
||||
for child in remove:
|
||||
mdata.remove(child)
|
||||
if len(mdata) > 0:
|
||||
mdata[-1].tail = '\n '
|
||||
|
||||
@ -473,17 +474,17 @@ class EpubContainer(Container):
|
||||
book_type = 'epub'
|
||||
|
||||
META_INF = {
|
||||
'container.xml' : True,
|
||||
'manifest.xml' : False,
|
||||
'encryption.xml' : False,
|
||||
'metadata.xml' : False,
|
||||
'signatures.xml' : False,
|
||||
'rights.xml' : False,
|
||||
'container.xml': True,
|
||||
'manifest.xml': False,
|
||||
'encryption.xml': False,
|
||||
'metadata.xml': False,
|
||||
'signatures.xml': False,
|
||||
'rights.xml': False,
|
||||
}
|
||||
|
||||
def __init__(self, pathtoepub, log):
|
||||
self.pathtoepub = pathtoepub
|
||||
tdir = self.root = PersistentTemporaryDirectory('_epub_container')
|
||||
tdir = self.root = os.path.abspath(os.path.realpath(PersistentTemporaryDirectory('_epub_container')))
|
||||
with open(self.pathtoepub, 'rb') as stream:
|
||||
try:
|
||||
zf = ZipFile(stream)
|
||||
@ -616,7 +617,7 @@ class AZW3Container(Container):
|
||||
|
||||
def __init__(self, pathtoazw3, log):
|
||||
self.pathtoazw3 = pathtoazw3
|
||||
tdir = self.root = PersistentTemporaryDirectory('_azw3_container')
|
||||
tdir = self.root = os.path.abspath(os.path.realpath(PersistentTemporaryDirectory('_azw3_container')))
|
||||
with open(pathtoazw3, 'rb') as stream:
|
||||
raw = stream.read(3)
|
||||
if raw == b'TPZ':
|
||||
@ -670,7 +671,8 @@ class AZW3Container(Container):
|
||||
# }}}
|
||||
|
||||
def get_container(path, log=None):
|
||||
if log is None: log = default_log
|
||||
if log is None:
|
||||
log = default_log
|
||||
ebook = (AZW3Container if path.rpartition('.')[-1].lower() in {'azw3', 'mobi'}
|
||||
else EpubContainer)(path, log)
|
||||
return ebook
|
||||
|