mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Pull from trunk
This commit is contained in:
commit
9a0f97c6aa
@ -4,6 +4,99 @@
|
|||||||
# for important features/bug fixes.
|
# for important features/bug fixes.
|
||||||
# Also, each release can have new and improved recipes.
|
# Also, each release can have new and improved recipes.
|
||||||
|
|
||||||
|
- version: 0.7.20
|
||||||
|
date: 2010-09-24
|
||||||
|
|
||||||
|
|
||||||
|
new features:
|
||||||
|
- title: "Tweak epub feature."
|
||||||
|
type: major
|
||||||
|
description: >
|
||||||
|
"Now you can conveniently browse the contents of an epub, tweak them and rebuild the epub within your calibre library
|
||||||
|
by right clicking on the book and selecting Tweak ePub. See http://www.mobileread.com/forums/showthread.php?t=99875
|
||||||
|
for details."
|
||||||
|
|
||||||
|
- title: "Add button to Edit metadata dialog to trim borders from the cover"
|
||||||
|
|
||||||
|
- title: "Kobo driver: Add support for setting the ReadStatus to Read and correctly deal with empty collections"
|
||||||
|
|
||||||
|
- title: "Improved algorithm for removal of hyphens during pre-processing"
|
||||||
|
|
||||||
|
- title: "EPUB metadata: Don't read timestamp value from epubs as I am sick of closing bugs about adding books and having the Date not be today."
|
||||||
|
|
||||||
|
- title: "After bulk edit metadata, reselect previously selected books."
|
||||||
|
|
||||||
|
bug fixes:
|
||||||
|
- title: "Fix regression in 0.7.19 that broke the By Author and By Title category listing in Stanza/Aldiko feeds."
|
||||||
|
|
||||||
|
- title: "MOBI Output: Fix regression that broke sections list in downloaded periodicals on Kindle for non-english news sources"
|
||||||
|
|
||||||
|
- title: "News download: Rationalize cover processing."
|
||||||
|
tickets: [6852]
|
||||||
|
|
||||||
|
- title: "Cover cache: load images only in the GUI thread to prevent stale files being leftover by set_path due to Windows file locking"
|
||||||
|
|
||||||
|
- title: "Database: Make renaming of folders on case change more robust"
|
||||||
|
tickets: [6914]
|
||||||
|
|
||||||
|
- title: "When adding/replacing files to/in EPUB files, set the GPF bit for all files in the archive, to prevent unzip from complaining in linux"
|
||||||
|
tickets: [6363]
|
||||||
|
|
||||||
|
- title: "Plugin loading: Handle encoding declarations in .py files correctly"
|
||||||
|
|
||||||
|
- title: "MOBI input: Another corner case"
|
||||||
|
tickets: [6909]
|
||||||
|
|
||||||
|
- title: "IPC: Store results file in the calibre temp dir and also dont die if for some reason removing result file fails. Should make adding/saving more robust"
|
||||||
|
|
||||||
|
- title: "Database: Fix regression that caused has_cover to create empty directories unneccessarily"
|
||||||
|
|
||||||
|
- title: "Detection of Alex on unix"
|
||||||
|
tickets: [5900]
|
||||||
|
|
||||||
|
- title: "News download: Don't add inline table of contents when downloading news for the Kindle"
|
||||||
|
|
||||||
|
- title: "Add prologue and epilogue to default chapter detection regex"
|
||||||
|
|
||||||
|
- title: "Kobo driver: Fix issue where books that are read were getting their status reset to Unread"
|
||||||
|
|
||||||
|
- title: "Device drivers: Fix occassional false positive when matching books on device with books in the calibre library"
|
||||||
|
|
||||||
|
- title: "Content server: Making serving of large files more efficient."
|
||||||
|
|
||||||
|
- title: "GUI device detection: Handle case when user yanks connected device before device connection handler is called."
|
||||||
|
tickets: [6864]
|
||||||
|
|
||||||
|
- title: "Strip leading/trailing whitespace when setting metadata using the edit metadata dialog"
|
||||||
|
tickets: [6854]
|
||||||
|
|
||||||
|
- title: "KOBO: Editing the Im_Reading list with SD Card installed fixed"
|
||||||
|
tickets: [6850]
|
||||||
|
|
||||||
|
new recipes:
|
||||||
|
- title: "Neal's Nuze and Popular Science"
|
||||||
|
author: Tony Stegall
|
||||||
|
|
||||||
|
- title: "Rmf24.pl"
|
||||||
|
author: "Tomasz Dlugosz"
|
||||||
|
|
||||||
|
- title: "Gazeta Pomorska"
|
||||||
|
author: "Richard"
|
||||||
|
|
||||||
|
- title: "Le Journal de Montreal and superesportes"
|
||||||
|
author: "Luciano Furtado"
|
||||||
|
|
||||||
|
- title: "The Marker"
|
||||||
|
author: Marbs
|
||||||
|
|
||||||
|
- title: "Tagesanzeiger"
|
||||||
|
author: noxxx
|
||||||
|
|
||||||
|
|
||||||
|
improved recipes:
|
||||||
|
- Danas
|
||||||
|
- Harvard Business Review
|
||||||
|
|
||||||
- version: 0.7.19
|
- version: 0.7.19
|
||||||
date: 2010-09-17
|
date: 2010-09-17
|
||||||
|
|
||||||
@ -61,6 +154,7 @@
|
|||||||
|
|
||||||
- title: "PDB Input: Fix bug in conversion of TOC in some PML files"
|
- title: "PDB Input: Fix bug in conversion of TOC in some PML files"
|
||||||
|
|
||||||
|
|
||||||
new recipes:
|
new recipes:
|
||||||
- title: "taz.de RSS"
|
- title: "taz.de RSS"
|
||||||
author: Alexander Schremmer
|
author: Alexander Schremmer
|
||||||
@ -272,7 +366,7 @@
|
|||||||
new features:
|
new features:
|
||||||
- title: "Multiple library support: Various improvements to make using multiple calibre libraries easier."
|
- title: "Multiple library support: Various improvements to make using multiple calibre libraries easier."
|
||||||
type: major
|
type: major
|
||||||
desc: >
|
description: >
|
||||||
"Now, when you switch libraries using the Choose Library button on the toolbar, entries are created in the menu of that button to easily switch to that library in the
|
"Now, when you switch libraries using the Choose Library button on the toolbar, entries are created in the menu of that button to easily switch to that library in the
|
||||||
future. Also, you can now right click on a book in the calibre library and use the 'Copy to library' action to copy the book to another library,
|
future. Also, you can now right click on a book in the calibre library and use the 'Copy to library' action to copy the book to another library,
|
||||||
that you have switched to at least once. The name of the current library is shown in the titlebar.
|
that you have switched to at least once. The name of the current library is shown in the titlebar.
|
||||||
@ -280,7 +374,7 @@
|
|||||||
|
|
||||||
- title: "Content server: Allow setting a restriction so that the server shares only some of the books in the library."
|
- title: "Content server: Allow setting a restriction so that the server shares only some of the books in the library."
|
||||||
type: major
|
type: major
|
||||||
desc: >
|
description: >
|
||||||
"You can now use a Saved Search as a restiction for the content server, via Preferences->Content Server. This will cause the
|
"You can now use a Saved Search as a restiction for the content server, via Preferences->Content Server. This will cause the
|
||||||
server to share only those books that match the saved search.
|
server to share only those books that match the saved search.
|
||||||
"
|
"
|
||||||
|
@ -54,7 +54,7 @@ function render_book(book) {
|
|||||||
formats = book.attr("formats").split(",");
|
formats = book.attr("formats").split(",");
|
||||||
if (formats.length > 0) {
|
if (formats.length > 0) {
|
||||||
for (i=0; i < formats.length; i++) {
|
for (i=0; i < formats.length; i++) {
|
||||||
title += '<a title="Download in '+formats[i]+' format" class="format" href="'+format_url(formats[i], id, book.attr("title"))+'">'+formats[i]+'</a>, ';
|
title += '<a title="Download in '+formats[i]+' format" class="format" href="'+format_url(formats[i], id, book.attr("safe_title"))+'">'+formats[i]+'</a>, ';
|
||||||
}
|
}
|
||||||
title = title.slice(0, title.length-2);
|
title = title.slice(0, title.length-2);
|
||||||
title += ' ({0} MB) '.format(size);
|
title += ' ({0} MB) '.format(size);
|
||||||
|
BIN
resources/images/news/howtogeek.png
Normal file
BIN
resources/images/news/howtogeek.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 922 B |
BIN
resources/images/news/jpost_fr.png
Normal file
BIN
resources/images/news/jpost_fr.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 334 B |
@ -1,4 +1,5 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, re
|
||||||
class AdvancedUserRecipe1282101454(BasicNewsRecipe):
|
class AdvancedUserRecipe1282101454(BasicNewsRecipe):
|
||||||
title = 'Nealz Nuze'
|
title = 'Nealz Nuze'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
@ -6,16 +7,16 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
|
|||||||
description = 'Neal Boortz Show Radio Notes'
|
description = 'Neal Boortz Show Radio Notes'
|
||||||
publisher = 'Neal Boortz'
|
publisher = 'Neal Boortz'
|
||||||
category = 'news, politics, USA, talkshow'
|
category = 'news, politics, USA, talkshow'
|
||||||
oldest_article = 1
|
oldest_article = 2
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
linearize_tables = True
|
linearize_tables = True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
|
|
||||||
masthead_url = 'http://boortz.com/images/nuze_logo.gif'
|
masthead_url = 'http://boortz.com/images/nuze_logo.gif'
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='div', attrs={'id':['SiteContent']})
|
dict(name='td', attrs={'id':['contentWellCell']})
|
||||||
#,dict(attrs={'id':['cxArticleText']})
|
|
||||||
]
|
]
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='a', attrs={'class':['blogPermalink']}),
|
dict(name='a', attrs={'class':['blogPermalink']}),
|
||||||
@ -25,13 +26,13 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
|
|||||||
remove_tags_after = [dict(name='div', attrs={'class':'blogEntryBody'}),]
|
remove_tags_after = [dict(name='div', attrs={'class':'blogEntryBody'}),]
|
||||||
feeds = [
|
feeds = [
|
||||||
('NUZE', 'http://boortz.com/nealz_nuze_rss/rss.xml')
|
('NUZE', 'http://boortz.com/nealz_nuze_rss/rss.xml')
|
||||||
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
40
resources/recipes/howtogeek.recipe
Normal file
40
resources/recipes/howtogeek.recipe
Normal file
@ -0,0 +1,40 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
class AdvancedUserRecipe1282101454(BasicNewsRecipe):
|
||||||
|
title = 'How To Geek'
|
||||||
|
language = 'en'
|
||||||
|
__author__ = 'TonytheBookworm'
|
||||||
|
description = 'Daily Computer Tips and Tricks'
|
||||||
|
publisher = 'Howtogeek'
|
||||||
|
category = 'PC,tips,tricks'
|
||||||
|
oldest_article = 2
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
linearize_tables = True
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
masthead_url = 'http://blog.stackoverflow.com/wp-content/uploads/how-to-geek-logo.png'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
remove_tags =[dict(name='a', attrs={'target':['_blank']}),
|
||||||
|
dict(name='table', attrs={'id':['articleTable']}),
|
||||||
|
dict(name='div', attrs={'class':['feedflare']}),
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
('Tips', 'http://feeds.howtogeek.com/howtogeek')
|
||||||
|
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
57
resources/recipes/jpost_fr.recipe
Normal file
57
resources/recipes/jpost_fr.recipe
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class JerusalemPost(BasicNewsRecipe):
|
||||||
|
title = 'Jerusalem post'
|
||||||
|
language = 'fr'
|
||||||
|
__author__ = 'TonytheBookworm'
|
||||||
|
description = 'The Jerusalem Post (in French)'
|
||||||
|
publisher = 'jpost'
|
||||||
|
category = 'news'
|
||||||
|
oldest_article = 30
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
linearize_tables = True
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
|
masthead_url = 'http://static.jpost.com/JPSITES/images/JFrench/2008/site/jplogo.JFrench.gif'
|
||||||
|
|
||||||
|
remove_tags = [
|
||||||
|
dict(name='a', attrs={'href':['javascript:window.print()']}),
|
||||||
|
dict(name='div', attrs={'class':['bot']}),
|
||||||
|
|
||||||
|
]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
('NEWS', 'http://fr.jpost.com/servlet/Satellite?collId=1216805762036&pagename=JFrench%2FPage%2FRSS'),
|
||||||
|
('JFrench En route vers la paix', 'http://fr.jpost.com/servlet/Satellite?collId=1216805762201&pagename=JFrench%2FPage%2FRSS'),
|
||||||
|
('JFrench Politique', 'http://fr.jpost.com/servlet/Satellite?collId=1215356737334&pagename=JFrench%2FPage%2FRSS'),
|
||||||
|
('JFrench Securite', 'http://fr.jpost.com/servlet/Satellite?collId=1215356737338&pagename=JFrench%2FPage%2FRSS'),
|
||||||
|
('JFrench Moyen Orient', 'http://fr.jpost.com/servlet/Satellite?collId=1215356737342&pagename=JFrench%2FPage%2FRSS'),
|
||||||
|
('JFrench Diplomatie / Monde', 'http://fr.jpost.com/servlet/Satellite?collId=1215356737346&pagename=JFrench%2FPage%2FRSS'),
|
||||||
|
('JFrench Economie / Sciences', 'http://fr.jpost.com/servlet/Satellite?collId=1215356737358&pagename=JFrench%2FPage%2FRSS'),
|
||||||
|
('JFrench Societe', 'http://fr.jpost.com/servlet/Satellite?collId=1215356737354&pagename=JFrench%2FPage%2FRSS'),
|
||||||
|
('JFrench Opinions', 'http://fr.jpost.com/servlet/Satellite?collId=1215356737350&pagename=JFrench%2FPage%2FRSS'),
|
||||||
|
('JFrench Monde juif', 'http://fr.jpost.com/servlet/Satellite?collId=1215356737366&pagename=JFrench%2FPage%2FRSS'),
|
||||||
|
('JFrench Culture / Sport', 'http://fr.jpost.com/servlet/Satellite?collId=1215356737362&pagename=JFrench%2FPage%2FRSS')
|
||||||
|
]
|
||||||
|
def print_version(self, url):
|
||||||
|
split1 = url.split("cid=")
|
||||||
|
#for testing only -------
|
||||||
|
#print 'SPLIT IS: ', split1
|
||||||
|
#print 'ORG URL IS: ', url
|
||||||
|
#---------------------------
|
||||||
|
idnum = split1[1] # get the actual value of the id article
|
||||||
|
#for testing only --------------------
|
||||||
|
#print 'the idnum is: ', idnum
|
||||||
|
#--------------------------------------
|
||||||
|
print_url = 'http://fr.jpost.com/servlet/Satellite?cid=' + idnum + '&pagename=JFrench%2FJPArticle%2FPrinter'
|
||||||
|
#for testing only -------------------------
|
||||||
|
#print 'PRINT URL IS: ', print_url
|
||||||
|
#------------------------------------------
|
||||||
|
return print_url
|
||||||
|
|
||||||
|
#example of how links should be formated
|
||||||
|
#--------------------------------------------------------------------------------------------------------------
|
||||||
|
#org version = http://fr.jpost.com/servlet/Satellite?pagename=JFrench/JPArticle/ShowFull&cid=1282804806075
|
||||||
|
#print version = http://fr.jpost.com/servlet/Satellite?cid=1282804806075&pagename=JFrench%2FJPArticle%2FPrinter
|
||||||
|
#------------------------------------------------------------------------------------------------------------------
|
@ -1,5 +1,5 @@
|
|||||||
import re
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, re
|
||||||
|
|
||||||
class AdvancedUserRecipe1282101454(BasicNewsRecipe):
|
class AdvancedUserRecipe1282101454(BasicNewsRecipe):
|
||||||
title = 'Popular Science'
|
title = 'Popular Science'
|
||||||
@ -12,38 +12,36 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
|
|||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
|
use_embedded_content = True
|
||||||
|
|
||||||
masthead_url = 'http://www.raytheon.com/newsroom/rtnwcm/groups/Public/documents/masthead/rtn08_popscidec_masthead.jpg'
|
masthead_url = 'http://www.raytheon.com/newsroom/rtnwcm/groups/Public/documents/masthead/rtn08_popscidec_masthead.jpg'
|
||||||
|
|
||||||
remove_tags = [dict(name='div', attrs={'id':['toolbar','main_supplements']}),
|
|
||||||
dict(name='span', attrs={'class':['comments']}),
|
|
||||||
dict(name='div', attrs={'class':['relatedinfo related-right','node_navigation','content2']}),
|
|
||||||
dict(name='ul', attrs={'class':['item-list clear-block']})]
|
|
||||||
feeds = [
|
feeds = [
|
||||||
|
|
||||||
('Gadgets', 'http://www.popsci.com/full-feed/gadgets'),
|
('Gadgets', 'http://www.popsci.com/full-feed/gadgets'),
|
||||||
('Cars', 'http://www.popsci.com/full-feed/cars'),
|
('Cars', 'http://www.popsci.com/full-feed/cars'),
|
||||||
('Science', 'http://www.popsci.com/full-feed/science'),
|
('Science', 'http://www.popsci.com/full-feed/science'),
|
||||||
('Technology', 'http://www.popsci.com/full-feed/technology'),
|
('Technology', 'http://www.popsci.com/full-feed/technology'),
|
||||||
('DIY', 'http://www.popsci.com/full-feed/diy'),
|
('DIY', 'http://www.popsci.com/full-feed/diy'),
|
||||||
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
#The following will get read of the Gallery: links when found
|
#The following will get read of the Gallery: links when found
|
||||||
|
|
||||||
def preprocess_html(self, soup) :
|
def preprocess_html(self, soup) :
|
||||||
print 'SOUP IS: ', soup
|
print 'SOUP IS: ', soup
|
||||||
weblinks = soup.findAll(['head','h2'])
|
weblinks = soup.findAll(['head','h2'])
|
||||||
if weblinks is not None:
|
if weblinks is not None:
|
||||||
for link in weblinks:
|
for link in weblinks:
|
||||||
if re.search('(Gallery)(:)',str(link)):
|
if re.search('(Gallery)(:)',str(link)):
|
||||||
|
|
||||||
link.parent.extract()
|
link.parent.extract()
|
||||||
return soup
|
return soup
|
||||||
#-----------------------------------------------------------------
|
#-----------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,78 +1,91 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
|
||||||
__docformat__ = 'restructuredtext en'
|
|
||||||
|
|
||||||
'''
|
|
||||||
sciam.com
|
|
||||||
'''
|
|
||||||
import re
|
import re
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
class ScientificAmerican(BasicNewsRecipe):
|
class ScientificAmerican(BasicNewsRecipe):
|
||||||
title = u'Scientific American'
|
title = u'Scientific American'
|
||||||
description = u'Popular science. Monthly magazine.'
|
description = u'Popular Science. Monthly magazine.'
|
||||||
__author__ = 'Kovid Goyal'
|
category = 'science'
|
||||||
language = 'en'
|
__author__ = 'Starson17'
|
||||||
remove_javascript = True
|
no_stylesheets = True
|
||||||
encoding = 'utf-8'
|
use_embedded_content = False
|
||||||
|
language = 'en'
|
||||||
|
publisher = 'Nature Publishing Group'
|
||||||
|
remove_empty_feeds = True
|
||||||
|
remove_javascript = True
|
||||||
|
oldest_article = 30
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
|
||||||
def print_version(self, url):
|
conversion_options = {'linearize_tables' : True
|
||||||
return url + '&print=true'
|
, 'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
}
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
dict(name='h2', attrs={'class':'articleTitle'})
|
||||||
|
,dict(name='p', attrs={'id':'articleDek'})
|
||||||
|
,dict(name='p', attrs={'class':'articleInfo'})
|
||||||
|
,dict(name='div', attrs={'id':['articleContent']})
|
||||||
|
,dict(name='img', attrs={'src':re.compile(r'/media/inline/blog/Image/', re.DOTALL|re.IGNORECASE)})
|
||||||
|
]
|
||||||
|
|
||||||
|
remove_tags = [dict(name='a', attrs={'class':'tinyCommentCount'})]
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
soup = self.index_to_soup('http://www.scientificamerican.com/sciammag/')
|
soup = self.index_to_soup('http://www.scientificamerican.com/sciammag/')
|
||||||
month = self.tag_to_string(soup.find('p',attrs={'id':'articleDek'}))
|
issuetag = soup.find('p',attrs={'id':'articleDek'})
|
||||||
self.timefmt = ' [%s]'%(' '.join(month.strip().split()[:2]))
|
self.timefmt = ' [%s]'%(self.tag_to_string(issuetag))
|
||||||
img = soup.find('img', alt='Scientific American Magazine', src=True)
|
img = soup.find('img', alt='Scientific American Magazine', src=True)
|
||||||
if img is not None:
|
if img is not None:
|
||||||
self.cover_url = img['src']
|
self.cover_url = img['src']
|
||||||
|
features, feeds = [], []
|
||||||
feeds = []
|
for a in soup.find(attrs={'class':'primaryCol'}).findAll('a',attrs={'title':'Feature'}):
|
||||||
for div in soup.findAll('div', attrs={'class':['primaryCol',
|
if a is None: continue
|
||||||
'secondaryCol']}):
|
desc = ''
|
||||||
current_section = None
|
s = a.parent.parent.find(attrs={'class':'dek'})
|
||||||
for tag in div.findAll(['h2', 'ul']):
|
desc = self.tag_to_string(s)
|
||||||
if tag.name == 'h2':
|
article = {
|
||||||
current_section = self.tag_to_string(tag).strip()
|
'url' : a['href'],
|
||||||
self.log('\tFound section:', current_section)
|
'title' : self.tag_to_string(a),
|
||||||
elif current_section is not None and tag.name == 'ul':
|
'date' : '',
|
||||||
articles = []
|
'description' : desc,
|
||||||
for li in tag.findAll('li'):
|
}
|
||||||
t = li.findAll('a',
|
features.append(article)
|
||||||
attrs={'class':lambda x: x != 'thumb'},
|
feeds.append(('Features', features))
|
||||||
href=lambda x: x and 'article.cfm' in x)
|
department = []
|
||||||
if not t:
|
title = None
|
||||||
continue
|
for li in soup.find(attrs={'class':'secondaryCol'}).findAll('li'):
|
||||||
t = t[-1]
|
if 'department.cfm' in li.a['href']:
|
||||||
title = self.tag_to_string(t)
|
if department:
|
||||||
url = t['href']
|
feeds.append((title, department))
|
||||||
desc = ''
|
title = self.tag_to_string(li.a)
|
||||||
p = li.find(attrs={'class':'dek'})
|
department = []
|
||||||
if p is not None:
|
if 'article.cfm' in li.h3.a['href']:
|
||||||
desc = self.tag_to_string(p)
|
article = {
|
||||||
articles.append({'title':title, 'url':url,
|
'url' : li.h3.a['href'],
|
||||||
'description':desc, 'date':''})
|
'title' : self.tag_to_string(li.h3.a),
|
||||||
self.log('\t\tFound article:', title, '\n\t\tat', url)
|
'date': '',
|
||||||
if articles:
|
'description': self.tag_to_string(li.p),
|
||||||
feeds.append((current_section, articles))
|
}
|
||||||
current_section = None
|
department.append(article)
|
||||||
|
if department:
|
||||||
|
feeds.append((title, department))
|
||||||
return feeds
|
return feeds
|
||||||
|
|
||||||
def postprocess_html(self, soup, first_fetch):
|
def postprocess_html(self, soup, first_fetch):
|
||||||
if soup is not None:
|
for item in soup.findAll('a'):
|
||||||
for span in soup.findAll('span', attrs={'class':'pagination'}):
|
if 'topic.cfm' in item['href']:
|
||||||
span.extract()
|
item.replaceWith(item.string)
|
||||||
if not first_fetch:
|
|
||||||
div = soup.find('div', attrs={'class':'headline'})
|
|
||||||
if div:
|
|
||||||
div.extract()
|
|
||||||
|
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
preprocess_regexps = [
|
extra_css = '''
|
||||||
(re.compile(r'Already a Digital subscriber.*Now</a>', re.DOTALL|re.IGNORECASE), lambda match: ''),
|
p{font-weight: normal; font-size:small}
|
||||||
(re.compile(r'If your institution has site license access, enter.*here</a>.', re.DOTALL|re.IGNORECASE), lambda match: ''),
|
li{font-weight: normal; font-size:small}
|
||||||
(re.compile(r'to subscribe to our.*;.*\}', re.DOTALL|re.IGNORECASE), lambda match: ''),
|
.headline p{font-size:x-small; font-family:Arial,Helvetica,sans-serif;}
|
||||||
(re.compile(r'\)\(jQuery\);.*-->', re.DOTALL|re.IGNORECASE), lambda match: ''),
|
h2{font-size:large; font-family:Arial,Helvetica,sans-serif;}
|
||||||
]
|
h3{font-size:x-small;font-family:Arial,Helvetica,sans-serif;}
|
||||||
|
'''
|
||||||
|
@ -389,6 +389,7 @@ class HTMLPreProcessor(object):
|
|||||||
if is_pdftohtml:
|
if is_pdftohtml:
|
||||||
end_rules.append((re.compile(r'<p>\s*(?P<chap>(<[ibu]>){0,2}\s*([A-Z \'"!]{3,})\s*([\dA-Z:]+\s){0,4}\s*(</[ibu]>){0,2})\s*<p>\s*(?P<title>(<[ibu]>){0,2}(\s*\w+){1,4}\s*(</[ibu]>){0,2}\s*<p>)?'), chap_head),)
|
end_rules.append((re.compile(r'<p>\s*(?P<chap>(<[ibu]>){0,2}\s*([A-Z \'"!]{3,})\s*([\dA-Z:]+\s){0,4}\s*(</[ibu]>){0,2})\s*<p>\s*(?P<title>(<[ibu]>){0,2}(\s*\w+){1,4}\s*(</[ibu]>){0,2}\s*<p>)?'), chap_head),)
|
||||||
|
|
||||||
|
length = -1
|
||||||
if getattr(self.extra_opts, 'unwrap_factor', 0.0) > 0.01:
|
if getattr(self.extra_opts, 'unwrap_factor', 0.0) > 0.01:
|
||||||
length = line_length('pdf', html, getattr(self.extra_opts, 'unwrap_factor'))
|
length = line_length('pdf', html, getattr(self.extra_opts, 'unwrap_factor'))
|
||||||
if length:
|
if length:
|
||||||
@ -425,7 +426,7 @@ class HTMLPreProcessor(object):
|
|||||||
for rule in rules + end_rules:
|
for rule in rules + end_rules:
|
||||||
html = rule[0].sub(rule[1], html)
|
html = rule[0].sub(rule[1], html)
|
||||||
|
|
||||||
if is_pdftohtml:
|
if is_pdftohtml and length > -1:
|
||||||
# Dehyphenate
|
# Dehyphenate
|
||||||
dehyphenator = Dehyphenator()
|
dehyphenator = Dehyphenator()
|
||||||
html = dehyphenator(html,'pdf', length)
|
html = dehyphenator(html,'pdf', length)
|
||||||
|
@ -819,7 +819,8 @@ class MetadataSingleDialog(ResizableDialog, Ui_MetadataSingleDialog):
|
|||||||
fname = err.filename if err.filename else 'file'
|
fname = err.filename if err.filename else 'file'
|
||||||
return error_dialog(self, _('Permission denied'),
|
return error_dialog(self, _('Permission denied'),
|
||||||
_('Could not open %s. Is it being used by another'
|
_('Could not open %s. Is it being used by another'
|
||||||
' program?')%fname, show=True)
|
' program?')%fname, det_msg=traceback.format_exc(),
|
||||||
|
show=True)
|
||||||
raise
|
raise
|
||||||
self.save_state()
|
self.save_state()
|
||||||
QDialog.accept(self)
|
QDialog.accept(self)
|
||||||
|
@ -870,7 +870,13 @@ class LibraryDatabase2(LibraryDatabase, SchemaUpgrade, CustomColumns):
|
|||||||
path = self.format_abspath(index, format, index_is_id=index_is_id)
|
path = self.format_abspath(index, format, index_is_id=index_is_id)
|
||||||
if path is not None:
|
if path is not None:
|
||||||
f = open(path, mode)
|
f = open(path, mode)
|
||||||
ret = f if as_file else f.read()
|
try:
|
||||||
|
ret = f if as_file else f.read()
|
||||||
|
except IOError:
|
||||||
|
f.seek(0)
|
||||||
|
out = cStringIO.StringIO()
|
||||||
|
shutil.copyfileobj(f, out)
|
||||||
|
ret = out.getvalue()
|
||||||
if not as_file:
|
if not as_file:
|
||||||
f.close()
|
f.close()
|
||||||
return ret
|
return ret
|
||||||
|
@ -123,8 +123,6 @@ class ContentServer(object):
|
|||||||
|
|
||||||
return self.static('index.html')
|
return self.static('index.html')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Actually get content from the database {{{
|
# Actually get content from the database {{{
|
||||||
def get_cover(self, id, thumbnail=False):
|
def get_cover(self, id, thumbnail=False):
|
||||||
cover = self.db.cover(id, index_is_id=True, as_file=False)
|
cover = self.db.cover(id, index_is_id=True, as_file=False)
|
||||||
|
@ -19,6 +19,7 @@ from calibre.ebooks.metadata import fmt_sidx
|
|||||||
from calibre.constants import __appname__
|
from calibre.constants import __appname__
|
||||||
from calibre import human_readable
|
from calibre import human_readable
|
||||||
from calibre.utils.date import utcfromtimestamp
|
from calibre.utils.date import utcfromtimestamp
|
||||||
|
from calibre.utils.filenames import ascii_filename
|
||||||
|
|
||||||
def CLASS(*args, **kwargs): # class is a reserved word in Python
|
def CLASS(*args, **kwargs): # class is a reserved word in Python
|
||||||
kwargs['class'] = ' '.join(args)
|
kwargs['class'] = ' '.join(args)
|
||||||
@ -111,11 +112,13 @@ def build_index(books, num, search, sort, order, start, total, url_base, CKEYS):
|
|||||||
data = TD()
|
data = TD()
|
||||||
last = None
|
last = None
|
||||||
for fmt in book['formats'].split(','):
|
for fmt in book['formats'].split(','):
|
||||||
|
a = ascii_filename(book['authors'])
|
||||||
|
t = ascii_filename(book['title'])
|
||||||
s = SPAN(
|
s = SPAN(
|
||||||
A(
|
A(
|
||||||
fmt.lower(),
|
fmt.lower(),
|
||||||
href='/get/%s/%s-%s_%d.%s' % (fmt, book['authors'],
|
href='/get/%s/%s-%s_%d.%s' % (fmt, a, t,
|
||||||
book['title'], book['id'], fmt)
|
book['id'], fmt)
|
||||||
),
|
),
|
||||||
CLASS('button'))
|
CLASS('button'))
|
||||||
s.tail = u'\u202f' #
|
s.tail = u'\u202f' #
|
||||||
|
@ -16,6 +16,7 @@ from calibre.library.server.utils import strftime, format_tag_string
|
|||||||
from calibre.ebooks.metadata import fmt_sidx
|
from calibre.ebooks.metadata import fmt_sidx
|
||||||
from calibre.constants import preferred_encoding
|
from calibre.constants import preferred_encoding
|
||||||
from calibre import isbytestring
|
from calibre import isbytestring
|
||||||
|
from calibre.utils.filenames import ascii_filename
|
||||||
|
|
||||||
E = ElementMaker()
|
E = ElementMaker()
|
||||||
|
|
||||||
@ -92,6 +93,8 @@ class XMLServer(object):
|
|||||||
y = format_tag_string(y, ',', ignore_max=True)
|
y = format_tag_string(y, ',', ignore_max=True)
|
||||||
kwargs[x] = serialize(y) if y else ''
|
kwargs[x] = serialize(y) if y else ''
|
||||||
|
|
||||||
|
kwargs['safe_title'] = ascii_filename(kwargs['title'])
|
||||||
|
|
||||||
c = kwargs.pop('comments')
|
c = kwargs.pop('comments')
|
||||||
|
|
||||||
CFM = self.db.field_metadata
|
CFM = self.db.field_metadata
|
||||||
|
File diff suppressed because it is too large
Load Diff
Loading…
x
Reference in New Issue
Block a user