mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge from trunk
This commit is contained in:
commit
e0e2a0bf40
@ -15,6 +15,7 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
|||||||
max_articles_per_feed = 20
|
max_articles_per_feed = 20
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
|
ignore_duplicate_articles = {'title'}
|
||||||
|
|
||||||
preprocess_regexps = [
|
preprocess_regexps = [
|
||||||
(re.compile(r'<!-- Begin tmpl module_competition_offer -->.*?<!-- End tmpl module_competition_offer-->', re.IGNORECASE | re.DOTALL), lambda match: '')]
|
(re.compile(r'<!-- Begin tmpl module_competition_offer -->.*?<!-- End tmpl module_competition_offer-->', re.IGNORECASE | re.DOTALL), lambda match: '')]
|
||||||
|
@ -1,11 +1,13 @@
|
|||||||
from calibre import browser
|
from calibre import browser
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
import re
|
||||||
|
|
||||||
class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
||||||
title = u'Countryfile.com'
|
title = u'Countryfile.com'
|
||||||
#cover_url = 'http://www.countryfile.com/sites/default/files/imagecache/160px_wide/cover/2_1.jpg'
|
#cover_url = 'http://www.countryfile.com/sites/default/files/imagecache/160px_wide/cover/2_1.jpg'
|
||||||
__author__ = 'Dave Asbury'
|
__author__ = 'Dave Asbury'
|
||||||
description = 'The official website of Countryfile Magazine'
|
description = 'The official website of Countryfile Magazine'
|
||||||
# last updated 9/9//12
|
# last updated 7/10/12
|
||||||
language = 'en_GB'
|
language = 'en_GB'
|
||||||
oldest_article = 30
|
oldest_article = 30
|
||||||
max_articles_per_feed = 25
|
max_articles_per_feed = 25
|
||||||
@ -13,12 +15,14 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
auto_cleanup = True
|
auto_cleanup = True
|
||||||
#articles_are_obfuscated = True
|
#articles_are_obfuscated = True
|
||||||
|
ignore_duplicate_articles = {'title'}
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
soup = self.index_to_soup('http://www.countryfile.com/')
|
soup = self.index_to_soup('http://www.countryfile.com/')
|
||||||
cov = soup.find(attrs={'class' : 'imagecache imagecache-160px_wide imagecache-linked imagecache-160px_wide_linked'})
|
|
||||||
|
cov = soup.find(attrs={'width' : '160', 'class' : re.compile('imagecache imagecache-160px_wide')})
|
||||||
print '******** ',cov,' ***'
|
print '******** ',cov,' ***'
|
||||||
cov2 = str(cov)
|
cov2 = str(cov)
|
||||||
cov2=cov2[140:223]
|
cov2=cov2[10:101]
|
||||||
print '******** ',cov2,' ***'
|
print '******** ',cov2,' ***'
|
||||||
#cov2='http://www.countryfile.com/sites/default/files/imagecache/160px_wide/cover/1b_0.jpg'
|
#cov2='http://www.countryfile.com/sites/default/files/imagecache/160px_wide/cover/1b_0.jpg'
|
||||||
# try to get cover - if can't get known cover
|
# try to get cover - if can't get known cover
|
||||||
@ -40,3 +44,6 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
|||||||
(u'Country News', u'http://www.countryfile.com/rss/news'),
|
(u'Country News', u'http://www.countryfile.com/rss/news'),
|
||||||
(u'Countryside', u'http://www.countryfile.com/rss/countryside'),
|
(u'Countryside', u'http://www.countryfile.com/rss/countryside'),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -72,7 +72,7 @@ class DerSpiegel(BasicNewsRecipe):
|
|||||||
for article in section.findNextSiblings(['dd','dt']):
|
for article in section.findNextSiblings(['dd','dt']):
|
||||||
if article.name == 'dt':
|
if article.name == 'dt':
|
||||||
break
|
break
|
||||||
link = article.find('a')
|
link = article.find('a', href=True)
|
||||||
title = self.tag_to_string(link).strip()
|
title = self.tag_to_string(link).strip()
|
||||||
if title in self.empty_articles:
|
if title in self.empty_articles:
|
||||||
continue
|
continue
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
||||||
title = u'FHM UK'
|
title = u'FHM UK'
|
||||||
description = 'Good News for Men.'
|
description = 'Good News for Men.'
|
||||||
@ -7,14 +8,15 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
|||||||
# cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/373529_38324934806_64930243_n.jpg'
|
# cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/373529_38324934806_64930243_n.jpg'
|
||||||
masthead_url = 'http://www.fhm.com/App_Resources/Images/Site/re-design/logo.gif'
|
masthead_url = 'http://www.fhm.com/App_Resources/Images/Site/re-design/logo.gif'
|
||||||
__author__ = 'Dave Asbury'
|
__author__ = 'Dave Asbury'
|
||||||
# last updated 1/7/12
|
# last updated 7/10/12
|
||||||
language = 'en_GB'
|
language = 'en_GB'
|
||||||
oldest_article = 28
|
oldest_article = 31
|
||||||
max_articles_per_feed = 8
|
max_articles_per_feed = 15
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
#auto_cleanup = True
|
#auto_cleanup = True
|
||||||
# articles_are_obfuscated = True
|
# articles_are_obfuscated = True
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='h1'),
|
dict(name='h1'),
|
||||||
dict(name='img',attrs={'id' : 'ctl00_Body_imgMainImage'}),
|
dict(name='img',attrs={'id' : 'ctl00_Body_imgMainImage'}),
|
||||||
@ -28,15 +30,13 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
|||||||
|
|
||||||
]
|
]
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Homepage 1',u'http://feed43.com/6655867614547036.xml'),
|
# repeatable search = </div>{|}<a href="{%}" class="{*}">{%}</a>{|}<p>{*}</p>
|
||||||
(u'Homepage 2',u'http://feed43.com/4167731873103110.xml'),
|
(u'Homepage',u'http://rss.feedsportal.com/c/375/f/434908/index.rss'),
|
||||||
(u'Homepage 3',u'http://feed43.com/7667138788771570.xml'),
|
(u'Funny',u'http://rss.feedsportal.com/c/375/f/434910/index.rss'),
|
||||||
(u'Homepage 4',u'http://feed43.com/6550421522527341.xml'),
|
(u'Girls',u'http://rss.feedsportal.com/c/375/f/434913/index.rss'),
|
||||||
(u'Funny - The Very Best Of The Internet',u'http://feed43.com/4538510106331565.xml'),
|
|
||||||
(u'Gaming',u'http://feed43.com/6537162612465672.xml'),
|
|
||||||
(u'Girls',u'http://feed43.com/4574262733341068.xml'),# edit link http://feed43.com/feed.html?name=4574262733341068
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
extra_css = '''
|
extra_css = '''
|
||||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||||
|
@ -4,7 +4,7 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
|
|||||||
title = u'New Musical Express Magazine'
|
title = u'New Musical Express Magazine'
|
||||||
description = 'Author D.Asbury. UK Rock & Pop Mag. '
|
description = 'Author D.Asbury. UK Rock & Pop Mag. '
|
||||||
__author__ = 'Dave Asbury'
|
__author__ = 'Dave Asbury'
|
||||||
# last updated 9/6/12
|
# last updated 7/10/12
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
@ -14,15 +14,13 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
|
|||||||
language = 'en_GB'
|
language = 'en_GB'
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
soup = self.index_to_soup('http://www.magazinesdirect.com/categories/mens/tv-and-music/')
|
soup = self.index_to_soup('http://www.nme.com/component/subscribe')
|
||||||
cov = soup.find(attrs={'title' : 'NME magazine subscriptions'})
|
cov = soup.find(attrs={'id' : 'magazine_cover'})
|
||||||
cov2 = 'http://www.magazinesdirect.com'+cov['src']
|
cov2 = str(cov['src'])
|
||||||
print '***cov = ',cov2,' ***'
|
|
||||||
|
|
||||||
cover_url = str(cov2)
|
|
||||||
# print '**** Cov url =*', cover_url,'***'
|
# print '**** Cov url =*', cover_url,'***'
|
||||||
#print '**** Cov url =*','http://www.magazinesdirect.com/article_images/articledir_3138/1569221/1_largelisting.jpg','***'
|
#print '**** Cov url =*','http://www.magazinesdirect.com/article_images/articledir_3138/1569221/1_largelisting.jpg','***'
|
||||||
|
|
||||||
|
|
||||||
br = browser()
|
br = browser()
|
||||||
br.set_handle_redirect(False)
|
br.set_handle_redirect(False)
|
||||||
try:
|
try:
|
||||||
@ -31,8 +29,8 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
|
|||||||
except:
|
except:
|
||||||
cover_url = 'http://tawanda3000.files.wordpress.com/2011/02/nme-logo.jpg'
|
cover_url = 'http://tawanda3000.files.wordpress.com/2011/02/nme-logo.jpg'
|
||||||
return cover_url
|
return cover_url
|
||||||
masthead_url = 'http://tawanda3000.files.wordpress.com/2011/02/nme-logo.jpg'
|
|
||||||
|
|
||||||
|
masthead_url = 'http://tawanda3000.files.wordpress.com/2011/02/nme-logo.jpg'
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict( attrs={'class':'clear_icons'}),
|
dict( attrs={'class':'clear_icons'}),
|
||||||
@ -61,9 +59,15 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
|
|||||||
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'NME News', u'http://feeds2.feedburner.com/nmecom/rss/newsxml'),
|
(u'NME News', u'http://feeds.feedburner.com/nmecom/rss/newsxml?format=xml'),
|
||||||
#(u'Reviews', u'http://feeds2.feedburner.com/nme/SdML'),
|
#(u'Reviews', u'http://feeds2.feedburner.com/nme/SdML'),
|
||||||
(u'Reviews',u'http://feed43.com/4138608576351646.xml'),
|
(u'Reviews',u'http://feed43.com/1817687144061333.xml'),
|
||||||
(u'Bloggs',u'http://feed43.com/3326754333186048.xml'),
|
(u'Bloggs',u'http://feed43.com/3326754333186048.xml'),
|
||||||
|
|
||||||
]
|
]
|
||||||
|
extra_css = '''
|
||||||
|
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||||
|
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||||
|
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||||
|
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||||
|
'''
|
||||||
|
18
recipes/pvp_online.recipe
Normal file
18
recipes/pvp_online.recipe
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class AdvancedUserRecipe1344926684(BasicNewsRecipe):
|
||||||
|
title = u'PVP online'
|
||||||
|
__author__ = 'Krittika Goyal'
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
#auto_cleanup = True
|
||||||
|
no_stylesheets = True
|
||||||
|
use_embedded_content = False
|
||||||
|
language = 'en'
|
||||||
|
remove_javascript = True
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='div', attrs={'class':'body'})]
|
||||||
|
remove_tags = [dict(name='div', attrs={'class':'prevBg'}),dict(name='div', attrs={'class':'nextBg'}),dict(name='div', attrs={'class':'postMeta'})]
|
||||||
|
|
||||||
|
feeds = [(u'Comics', u'http://pvponline.com/feed'), ]
|
||||||
|
|
@ -5,13 +5,15 @@ class AdvancedUserRecipe1324663493(BasicNewsRecipe):
|
|||||||
title = u'Shortlist'
|
title = u'Shortlist'
|
||||||
description = 'Articles From Shortlist.com'
|
description = 'Articles From Shortlist.com'
|
||||||
# I've set oldest article to 7 days as the website updates weekly
|
# I've set oldest article to 7 days as the website updates weekly
|
||||||
oldest_article = 7
|
oldest_article = 8
|
||||||
max_articles_per_feed = 12
|
max_articles_per_feed = 20
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
|
ignore_duplicate_articles = {'title'}
|
||||||
|
|
||||||
__author__ = 'Dave Asbury'
|
__author__ = 'Dave Asbury'
|
||||||
# last updated 19/5/12
|
# last updated 7/10/12
|
||||||
language = 'en_GB'
|
language = 'en_GB'
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
soup = self.index_to_soup('http://www.shortlist.com')
|
soup = self.index_to_soup('http://www.shortlist.com')
|
||||||
@ -45,17 +47,16 @@ class AdvancedUserRecipe1324663493(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Home carousel',u'http://feed43.com/7106317222455380.xml'),
|
#edit http://feed43.com/feed.html?name=3156308700147005
|
||||||
(u'This Weeks Issue', u'http://feed43.com/0323588208751786.xml'),
|
# repeatable pattern = <h3>{_}<a href="{%}">{%}</a>{*}</h3>
|
||||||
(u'Cool Stuff',u'http://feed43.com/6253845228768456.xml'),
|
|
||||||
(u'Style',u'http://feed43.com/7217107577215678.xml'),
|
(u'This Weeks Issue', u'http://feed43.com/5205766657404804.xml'),
|
||||||
(u'Films',u'http://feed43.com/3101308515277265.xml'),
|
(u'Home Page',u'http://feed43.com/3156308700147005.xml'),
|
||||||
(u'Music',u'http://feed43.com/2416400550560162.xml'),
|
(u'Cool Stuff',u'http://feed43.com/1557051772026706.xml'),
|
||||||
(u'TV',u'http://feed43.com/4781172470717123.xml'),
|
(u'Style',u'http://feed43.com/4168836374571502.xml'),
|
||||||
(u'Sport',u'http://feed43.com/5303151885853308.xml'),
|
(u'Entertainment',u'http://feed43.com/4578504030588024.xml'),
|
||||||
(u'Gaming',u'http://feed43.com/8883764600355347.xml'),
|
|
||||||
(u'Women',u'http://feed43.com/2648221746514241.xml'),
|
|
||||||
(u'Instant Improver', u'http://feed43.com/1236541026275417.xml'),
|
|
||||||
|
|
||||||
#(u'Articles', u'http://feed43.com/3428534448355545.xml')
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@ -40,6 +40,7 @@ class ANDROID(USBMS):
|
|||||||
0xca4 : HTC_BCDS,
|
0xca4 : HTC_BCDS,
|
||||||
0xca9 : HTC_BCDS,
|
0xca9 : HTC_BCDS,
|
||||||
0xcac : HTC_BCDS,
|
0xcac : HTC_BCDS,
|
||||||
|
0xcba : HTC_BCDS,
|
||||||
0xccf : HTC_BCDS,
|
0xccf : HTC_BCDS,
|
||||||
0xcd6 : HTC_BCDS,
|
0xcd6 : HTC_BCDS,
|
||||||
0xce5 : HTC_BCDS,
|
0xce5 : HTC_BCDS,
|
||||||
|
@ -12,19 +12,17 @@ Originally developed by Timothy Legge <timlegge@gmail.com>.
|
|||||||
Extended to support Touch firmware 2.0.0 and later and newer devices by David Forrester <davidfor@internode.on.net>
|
Extended to support Touch firmware 2.0.0 and later and newer devices by David Forrester <davidfor@internode.on.net>
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import os, time, calendar
|
import os, time
|
||||||
from contextlib import closing
|
from contextlib import closing
|
||||||
from calibre.devices.usbms.books import BookList
|
from calibre.devices.usbms.books import BookList
|
||||||
from calibre.devices.usbms.books import CollectionsBookList
|
from calibre.devices.usbms.books import CollectionsBookList
|
||||||
from calibre.devices.kobo.books import KTCollectionsBookList
|
from calibre.devices.kobo.books import KTCollectionsBookList
|
||||||
from calibre.devices.kobo.books import Book
|
from calibre.devices.kobo.books import Book
|
||||||
from calibre.devices.kobo.books import ImageWrapper
|
from calibre.devices.kobo.books import ImageWrapper
|
||||||
from calibre.devices.kobo.bookmark import Bookmark
|
|
||||||
from calibre.devices.mime import mime_type_ext
|
from calibre.devices.mime import mime_type_ext
|
||||||
from calibre.devices.usbms.driver import USBMS, debug_print
|
from calibre.devices.usbms.driver import USBMS, debug_print
|
||||||
from calibre import prints
|
from calibre import prints
|
||||||
from calibre.ptempfile import PersistentTemporaryFile
|
from calibre.ptempfile import PersistentTemporaryFile
|
||||||
|
|
||||||
from calibre.constants import DEBUG
|
from calibre.constants import DEBUG
|
||||||
from calibre.utils.config import prefs
|
from calibre.utils.config import prefs
|
||||||
|
|
||||||
@ -994,6 +992,7 @@ class KOBO(USBMS):
|
|||||||
return USBMS.create_annotations_path(self, mdata)
|
return USBMS.create_annotations_path(self, mdata)
|
||||||
|
|
||||||
def get_annotations(self, path_map):
|
def get_annotations(self, path_map):
|
||||||
|
from calibre.devices.kobo.bookmark import Bookmark
|
||||||
EPUB_FORMATS = [u'epub']
|
EPUB_FORMATS = [u'epub']
|
||||||
epub_formats = set(EPUB_FORMATS)
|
epub_formats = set(EPUB_FORMATS)
|
||||||
|
|
||||||
@ -1056,6 +1055,7 @@ class KOBO(USBMS):
|
|||||||
return bookmarked_books
|
return bookmarked_books
|
||||||
|
|
||||||
def generate_annotation_html(self, bookmark):
|
def generate_annotation_html(self, bookmark):
|
||||||
|
import calendar
|
||||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, NavigableString
|
||||||
# Returns <div class="user_annotations"> ... </div>
|
# Returns <div class="user_annotations"> ... </div>
|
||||||
#last_read_location = bookmark.last_read_location
|
#last_read_location = bookmark.last_read_location
|
||||||
|
@ -6,15 +6,19 @@ __docformat__ = 'restructuredtext en'
|
|||||||
'''
|
'''
|
||||||
Convert an ODT file into a Open Ebook
|
Convert an ODT file into a Open Ebook
|
||||||
'''
|
'''
|
||||||
import os
|
import os, logging
|
||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
from cssutils import CSSParser
|
||||||
|
from cssutils.css import CSSRule
|
||||||
|
|
||||||
from odf.odf2xhtml import ODF2XHTML
|
from odf.odf2xhtml import ODF2XHTML
|
||||||
from odf.opendocument import load as odLoad
|
from odf.opendocument import load as odLoad
|
||||||
from odf.draw import Frame as odFrame, Image as odImage
|
from odf.draw import Frame as odFrame, Image as odImage
|
||||||
from odf.namespaces import TEXTNS as odTEXTNS
|
from odf.namespaces import TEXTNS as odTEXTNS
|
||||||
|
|
||||||
from calibre import CurrentDir, walk
|
from calibre import CurrentDir, walk
|
||||||
|
from calibre.ebooks.oeb.base import _css_logger
|
||||||
|
|
||||||
class Extract(ODF2XHTML):
|
class Extract(ODF2XHTML):
|
||||||
|
|
||||||
@ -29,14 +33,14 @@ class Extract(ODF2XHTML):
|
|||||||
|
|
||||||
def fix_markup(self, html, log):
|
def fix_markup(self, html, log):
|
||||||
root = etree.fromstring(html)
|
root = etree.fromstring(html)
|
||||||
self.epubify_markup(root, log)
|
|
||||||
self.filter_css(root, log)
|
self.filter_css(root, log)
|
||||||
self.extract_css(root)
|
self.extract_css(root, log)
|
||||||
|
self.epubify_markup(root, log)
|
||||||
html = etree.tostring(root, encoding='utf-8',
|
html = etree.tostring(root, encoding='utf-8',
|
||||||
xml_declaration=True)
|
xml_declaration=True)
|
||||||
return html
|
return html
|
||||||
|
|
||||||
def extract_css(self, root):
|
def extract_css(self, root, log):
|
||||||
ans = []
|
ans = []
|
||||||
for s in root.xpath('//*[local-name() = "style" and @type="text/css"]'):
|
for s in root.xpath('//*[local-name() = "style" and @type="text/css"]'):
|
||||||
ans.append(s.text)
|
ans.append(s.text)
|
||||||
@ -51,9 +55,21 @@ class Extract(ODF2XHTML):
|
|||||||
etree.SubElement(head, ns+'link', {'type':'text/css',
|
etree.SubElement(head, ns+'link', {'type':'text/css',
|
||||||
'rel':'stylesheet', 'href':'odfpy.css'})
|
'rel':'stylesheet', 'href':'odfpy.css'})
|
||||||
|
|
||||||
with open('odfpy.css', 'wb') as f:
|
css = u'\n\n'.join(ans)
|
||||||
f.write((u'\n\n'.join(ans)).encode('utf-8'))
|
parser = CSSParser(loglevel=logging.WARNING,
|
||||||
|
log=_css_logger)
|
||||||
|
self.css = parser.parseString(css, validate=False)
|
||||||
|
|
||||||
|
with open('odfpy.css', 'wb') as f:
|
||||||
|
f.write(css.encode('utf-8'))
|
||||||
|
|
||||||
|
def get_css_for_class(self, cls):
|
||||||
|
if not cls: return None
|
||||||
|
for rule in self.css.cssRules.rulesOfType(CSSRule.STYLE_RULE):
|
||||||
|
for sel in rule.selectorList:
|
||||||
|
q = sel.selectorText
|
||||||
|
if q == '.' + cls:
|
||||||
|
return rule
|
||||||
|
|
||||||
def epubify_markup(self, root, log):
|
def epubify_markup(self, root, log):
|
||||||
from calibre.ebooks.oeb.base import XPath, XHTML
|
from calibre.ebooks.oeb.base import XPath, XHTML
|
||||||
@ -84,16 +100,54 @@ class Extract(ODF2XHTML):
|
|||||||
div.attrib['style'] = style
|
div.attrib['style'] = style
|
||||||
img.attrib['style'] = 'max-width: 100%; max-height: 100%'
|
img.attrib['style'] = 'max-width: 100%; max-height: 100%'
|
||||||
|
|
||||||
# A div/div/img construct causes text-align:center to not work in ADE
|
# Handle anchored images. The default markup + CSS produced by
|
||||||
# so set the display of the second div to inline. This should have no
|
# odf2xhtml works with WebKit but not with ADE. So we convert the
|
||||||
# effect (apart from minor vspace issues) in a compliant HTML renderer
|
# common cases of left/right/center aligned block images to work on
|
||||||
# but it fixes the centering of the image via a text-align:center on
|
# both webkit and ADE. We detect the case of setting the side margins
|
||||||
# the first div in ADE
|
# to auto and map it to an appropriate text-align directive, which
|
||||||
|
# works in both WebKit and ADE.
|
||||||
|
# https://bugs.launchpad.net/bugs/1063207
|
||||||
|
# https://bugs.launchpad.net/calibre/+bug/859343
|
||||||
imgpath = XPath('descendant::h:div/h:div/h:img')
|
imgpath = XPath('descendant::h:div/h:div/h:img')
|
||||||
for img in imgpath(root):
|
for img in imgpath(root):
|
||||||
div2 = img.getparent()
|
div2 = img.getparent()
|
||||||
div1 = div2.getparent()
|
div1 = div2.getparent()
|
||||||
if len(div1) == len(div2) == 1:
|
if (len(div1), len(div2)) != (1, 1): continue
|
||||||
|
cls = div1.get('class', '')
|
||||||
|
first_rules = filter(None, [self.get_css_for_class(x) for x in
|
||||||
|
cls.split()])
|
||||||
|
has_align = False
|
||||||
|
for r in first_rules:
|
||||||
|
if r.style.getProperty(u'text-align') is not None:
|
||||||
|
has_align = True
|
||||||
|
ml = mr = None
|
||||||
|
if not has_align:
|
||||||
|
aval = None
|
||||||
|
cls = div2.get(u'class', u'')
|
||||||
|
rules = filter(None, [self.get_css_for_class(x) for x in
|
||||||
|
cls.split()])
|
||||||
|
for r in rules:
|
||||||
|
ml = r.style.getPropertyCSSValue(u'margin-left') or ml
|
||||||
|
mr = r.style.getPropertyCSSValue(u'margin-right') or mr
|
||||||
|
ml = getattr(ml, 'value', None)
|
||||||
|
mr = getattr(mr, 'value', None)
|
||||||
|
if ml == mr == u'auto':
|
||||||
|
aval = u'center'
|
||||||
|
elif ml == u'auto' and mr != u'auto':
|
||||||
|
aval = 'right'
|
||||||
|
elif ml != u'auto' and mr == u'auto':
|
||||||
|
aval = 'left'
|
||||||
|
if aval is not None:
|
||||||
|
style = div1.attrib.get('style', '').strip()
|
||||||
|
if style and not style.endswith(';'):
|
||||||
|
style = style + ';'
|
||||||
|
style += 'text-align:%s'%aval
|
||||||
|
has_align = True
|
||||||
|
div1.attrib['style'] = style
|
||||||
|
|
||||||
|
if has_align:
|
||||||
|
# This is needed for ADE, without it the text-align has no
|
||||||
|
# effect
|
||||||
style = div2.attrib['style']
|
style = div2.attrib['style']
|
||||||
div2.attrib['style'] = 'display:inline;'+style
|
div2.attrib['style'] = 'display:inline;'+style
|
||||||
|
|
||||||
|
@ -211,23 +211,25 @@ def main(args=sys.argv):
|
|||||||
msg = compose_mail(args[1], args[2], args[3], subject=opts.subject,
|
msg = compose_mail(args[1], args[2], args[3], subject=opts.subject,
|
||||||
attachment=opts.attachment)
|
attachment=opts.attachment)
|
||||||
from_, to = args[1:3]
|
from_, to = args[1:3]
|
||||||
efrom, eto = map(extract_email_address, (from_, to))
|
eto = [extract_email_address(x.strip()) for x in to.split(',')]
|
||||||
eto = [eto]
|
efrom = extract_email_address(from_)
|
||||||
else:
|
else:
|
||||||
msg = sys.stdin.read()
|
msg = sys.stdin.read()
|
||||||
from email.parser import Parser
|
from email import message_from_string
|
||||||
from email.utils import getaddresses
|
from email.utils import getaddresses
|
||||||
eml = Parser.parsestr(msg, headersonly=True)
|
eml = message_from_string(msg)
|
||||||
tos = eml.get_all('to', [])
|
tos = eml.get_all('to', [])
|
||||||
ccs = eml.get_all('cc', [])
|
ccs = eml.get_all('cc', []) + eml.get_all('bcc', [])
|
||||||
eto = getaddresses(tos + ccs)
|
all_tos = []
|
||||||
|
for x in tos + ccs:
|
||||||
|
all_tos.extend(y.strip() for y in x.split(','))
|
||||||
|
eto = list(map(extract_email_address, all_tos))
|
||||||
if not eto:
|
if not eto:
|
||||||
raise ValueError('Email from STDIN does not specify any recipients')
|
raise ValueError('Email from STDIN does not specify any recipients')
|
||||||
efrom = getaddresses(eml.get_all('from', []))
|
efrom = getaddresses(eml.get_all('from', []))
|
||||||
if not efrom:
|
if not efrom:
|
||||||
raise ValueError('Email from STDIN does not specify a sender')
|
raise ValueError('Email from STDIN does not specify a sender')
|
||||||
efrom = efrom[0]
|
efrom = efrom[0][1]
|
||||||
|
|
||||||
|
|
||||||
outbox = None
|
outbox = None
|
||||||
if opts.outbox is not None:
|
if opts.outbox is not None:
|
||||||
|
@ -265,6 +265,12 @@ class Feed(object):
|
|||||||
if i > -1:
|
if i > -1:
|
||||||
self.articles[i:i+1] = []
|
self.articles[i:i+1] = []
|
||||||
|
|
||||||
|
def remove_article(self, article):
|
||||||
|
try:
|
||||||
|
self.articles.remove(article)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
class FeedCollection(list):
|
class FeedCollection(list):
|
||||||
|
|
||||||
def __init__(self, feeds):
|
def __init__(self, feeds):
|
||||||
|
@ -167,9 +167,10 @@ class BasicNewsRecipe(Recipe):
|
|||||||
extra_css = None
|
extra_css = None
|
||||||
|
|
||||||
#: If True empty feeds are removed from the output.
|
#: If True empty feeds are removed from the output.
|
||||||
#: This option has no effect if parse_index is overriden in
|
#: This option has no effect if parse_index is overridden in
|
||||||
#: the sub class. It is meant only for recipes that return a list
|
#: the sub class. It is meant only for recipes that return a list
|
||||||
#: of feeds using `feeds` or :meth:`get_feeds`.
|
#: of feeds using `feeds` or :meth:`get_feeds`. It is also used if you use
|
||||||
|
#: the ignore_duplicate_articles option.
|
||||||
remove_empty_feeds = False
|
remove_empty_feeds = False
|
||||||
|
|
||||||
#: List of regular expressions that determines which links to follow
|
#: List of regular expressions that determines which links to follow
|
||||||
@ -321,6 +322,15 @@ class BasicNewsRecipe(Recipe):
|
|||||||
#: The string will be used as the disabled message
|
#: The string will be used as the disabled message
|
||||||
recipe_disabled = None
|
recipe_disabled = None
|
||||||
|
|
||||||
|
#: Ignore duplicates of articles that are present in more than one section.
|
||||||
|
#: A duplicate article is an article that has the same title and/or URL.
|
||||||
|
#: To ignore articles with the same title, set this to:
|
||||||
|
#: ignore_duplicate_articles = {'title'}
|
||||||
|
#: To use URLs instead, set it to:
|
||||||
|
#: ignore_duplicate_articles = {'url'}
|
||||||
|
#: To match on title or URL, set it to:
|
||||||
|
#: ignore_duplicate_articles = {'title', 'url'}
|
||||||
|
ignore_duplicate_articles = None
|
||||||
|
|
||||||
# See the built-in profiles for examples of these settings.
|
# See the built-in profiles for examples of these settings.
|
||||||
|
|
||||||
@ -1019,6 +1029,28 @@ class BasicNewsRecipe(Recipe):
|
|||||||
url = ('file:'+pt.name) if iswindows else ('file://'+pt.name)
|
url = ('file:'+pt.name) if iswindows else ('file://'+pt.name)
|
||||||
return self._fetch_article(url, dir, f, a, num_of_feeds)
|
return self._fetch_article(url, dir, f, a, num_of_feeds)
|
||||||
|
|
||||||
|
def remove_duplicate_articles(self, feeds):
|
||||||
|
seen_keys = defaultdict(set)
|
||||||
|
remove = []
|
||||||
|
for f in feeds:
|
||||||
|
for article in f:
|
||||||
|
for key in self.ignore_duplicate_articles:
|
||||||
|
val = getattr(article, key)
|
||||||
|
seen = seen_keys[key]
|
||||||
|
if val:
|
||||||
|
if val in seen:
|
||||||
|
remove.append((f, article))
|
||||||
|
else:
|
||||||
|
seen.add(val)
|
||||||
|
|
||||||
|
for feed, article in remove:
|
||||||
|
self.log.debug('Removing duplicate article: %s from section: %s'%(
|
||||||
|
article.title, feed.title))
|
||||||
|
feed.remove_article(article)
|
||||||
|
|
||||||
|
if self.remove_empty_feeds:
|
||||||
|
feeds = [f for f in feeds if len(f) > 0]
|
||||||
|
return feeds
|
||||||
|
|
||||||
def build_index(self):
|
def build_index(self):
|
||||||
self.report_progress(0, _('Fetching feeds...'))
|
self.report_progress(0, _('Fetching feeds...'))
|
||||||
@ -1033,6 +1065,9 @@ class BasicNewsRecipe(Recipe):
|
|||||||
if not feeds:
|
if not feeds:
|
||||||
raise ValueError('No articles found, aborting')
|
raise ValueError('No articles found, aborting')
|
||||||
|
|
||||||
|
if self.ignore_duplicate_articles is not None:
|
||||||
|
feeds = self.remove_duplicate_articles(feeds)
|
||||||
|
|
||||||
#feeds = FeedCollection(feeds)
|
#feeds = FeedCollection(feeds)
|
||||||
|
|
||||||
self.report_progress(0, _('Trying to download cover...'))
|
self.report_progress(0, _('Trying to download cover...'))
|
||||||
|
@ -68,7 +68,12 @@ def serialize_collection(mapping_of_recipe_classes):
|
|||||||
key=lambda key: force_unicode(
|
key=lambda key: force_unicode(
|
||||||
getattr(mapping_of_recipe_classes[key], 'title', 'zzz'),
|
getattr(mapping_of_recipe_classes[key], 'title', 'zzz'),
|
||||||
'utf-8')):
|
'utf-8')):
|
||||||
|
try:
|
||||||
recipe = serialize_recipe(urn, mapping_of_recipe_classes[urn])
|
recipe = serialize_recipe(urn, mapping_of_recipe_classes[urn])
|
||||||
|
except:
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
continue
|
||||||
collection.append(recipe)
|
collection.append(recipe)
|
||||||
collection.set('count', str(len(collection)))
|
collection.set('count', str(len(collection)))
|
||||||
return etree.tostring(collection, encoding='utf-8', xml_declaration=True,
|
return etree.tostring(collection, encoding='utf-8', xml_declaration=True,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user