mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Improved recipe for Welt
This commit is contained in:
parent
d928b34798
commit
02e372769d
@ -15,12 +15,13 @@ class weltDe(BasicNewsRecipe):
|
|||||||
__author__ = 'Oliver Niesner'
|
__author__ = 'Oliver Niesner'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
timefmt = ' [%d %b %Y]'
|
timefmt = ' [%d %b %Y]'
|
||||||
max_articles_per_feed = 15 # reduced to this value to prevent too many articles (suggested by Gregory Riker
|
max_articles_per_feed = 15
|
||||||
|
linearize_tables = True
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_stylesheets = True
|
remove_stylesheets = True
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
language = 'de'
|
|
||||||
encoding = 'iso-8859-1'
|
encoding = 'iso-8859-1'
|
||||||
|
BasicNewsRecipe.summary_length = 200
|
||||||
|
|
||||||
|
|
||||||
remove_tags = [dict(id='jumplinks'),
|
remove_tags = [dict(id='jumplinks'),
|
||||||
@ -43,10 +44,14 @@ class weltDe(BasicNewsRecipe):
|
|||||||
dict(id='servicesBox'),
|
dict(id='servicesBox'),
|
||||||
dict(id='toggleAdvancedSearch'),
|
dict(id='toggleAdvancedSearch'),
|
||||||
dict(id='mainNav'),
|
dict(id='mainNav'),
|
||||||
dict(id='ratingBox5136466_1'),
|
|
||||||
dict(id='ratingBox5136466_2'),
|
|
||||||
dict(id='articleInlineMediaBox0'),
|
dict(id='articleInlineMediaBox0'),
|
||||||
dict(id='sectionSponsor'),
|
dict(id='sectionSponsor'),
|
||||||
|
dict(id='sprucharea'),
|
||||||
|
dict(id='xmsg_recommendEmail'),
|
||||||
|
dict(id='xmsg_recommendSms'),
|
||||||
|
dict(id='xmsg_comment'),
|
||||||
|
dict(id='additionalNavWrapper'),
|
||||||
|
dict(id='imagebox'),
|
||||||
#dict(id=''),
|
#dict(id=''),
|
||||||
dict(name='span'),
|
dict(name='span'),
|
||||||
dict(name='div', attrs={'class':'printURL'}),
|
dict(name='div', attrs={'class':'printURL'}),
|
||||||
@ -65,10 +70,21 @@ class weltDe(BasicNewsRecipe):
|
|||||||
dict(name='ul', attrs={'class':'optionsSubNav clear'}),
|
dict(name='ul', attrs={'class':'optionsSubNav clear'}),
|
||||||
dict(name='li', attrs={'class':'next'}),
|
dict(name='li', attrs={'class':'next'}),
|
||||||
dict(name='li', attrs={'class':'prev'}),
|
dict(name='li', attrs={'class':'prev'}),
|
||||||
|
dict(name='li', attrs={'class':'last'}),
|
||||||
|
dict(name='table', attrs={'class':'textGallery'}),
|
||||||
dict(name='li', attrs={'class':'active'})]
|
dict(name='li', attrs={'class':'active'})]
|
||||||
|
|
||||||
remove_tags_after = [dict(id='tw_link_widget')]
|
remove_tags_after = [dict(id='tw_link_widget')]
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
h2{font-family:Arial,Helvetica,sans-serif; font-size: x-small; color: #003399;}
|
||||||
|
a{font-family:Arial,Helvetica,sans-serif; font-size: x-small; font-style:italic;}
|
||||||
|
.dachzeile p{font-family:Arial,Helvetica,sans-serif; font-size: x-small; }
|
||||||
|
h1{ font-family:Arial,Helvetica,sans-serif; font-size:x-large; font-weight:bold;}
|
||||||
|
.artikelTeaser{font-family:Arial,Helvetica,sans-serif; font-size: x-small; font-weight:bold; }
|
||||||
|
body{font-family:Arial,Helvetica,sans-serif; }
|
||||||
|
.photo {font-family:Arial,Helvetica,sans-serif; font-size: x-small; color: #666666;} '''
|
||||||
|
|
||||||
feeds = [ ('Politik', 'http://welt.de/politik/?service=Rss'),
|
feeds = [ ('Politik', 'http://welt.de/politik/?service=Rss'),
|
||||||
('Deutsche Dinge', 'http://www.welt.de/deutsche-dinge/?service=Rss'),
|
('Deutsche Dinge', 'http://www.welt.de/deutsche-dinge/?service=Rss'),
|
||||||
('Wirtschaft', 'http://welt.de/wirtschaft/?service=Rss'),
|
('Wirtschaft', 'http://welt.de/wirtschaft/?service=Rss'),
|
||||||
|
@ -78,7 +78,7 @@ class HorizontalBox(object):
|
|||||||
def append(self, t):
|
def append(self, t):
|
||||||
self.texts.append(t)
|
self.texts.append(t)
|
||||||
|
|
||||||
def sort(self):
|
def sort(self, left_margin, right_margin):
|
||||||
self.texts.sort(cmp=lambda x,y: cmp(x.left, y.left))
|
self.texts.sort(cmp=lambda x,y: cmp(x.left, y.left))
|
||||||
self.top, self.bottom = sys.maxint, 0
|
self.top, self.bottom = sys.maxint, 0
|
||||||
for t in self.texts:
|
for t in self.texts:
|
||||||
@ -86,6 +86,27 @@ class HorizontalBox(object):
|
|||||||
self.bottom = max(self.bottom, t.bottom)
|
self.bottom = max(self.bottom, t.bottom)
|
||||||
self.left = self.texts[0].left
|
self.left = self.texts[0].left
|
||||||
self.right = self.texts[-1].right
|
self.right = self.texts[-1].right
|
||||||
|
self.gaps = []
|
||||||
|
for i, t in enumerate(self.texts[1:]):
|
||||||
|
gap = Interval(self.texts[i].right, t.left)
|
||||||
|
if gap.width > 3:
|
||||||
|
self.gaps.append(gap)
|
||||||
|
left = Interval(left_margin, self.texts[0].left)
|
||||||
|
if left.width > 3:
|
||||||
|
self.gaps.insert(0, left)
|
||||||
|
right = Interval(self.texts[-1].right, right_margin)
|
||||||
|
if right.width > 3:
|
||||||
|
self.gaps.append(right)
|
||||||
|
|
||||||
|
def has_intersection_with(self, gap):
|
||||||
|
for g in self.gaps:
|
||||||
|
if g.intersection(gap):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def identify_columns(self, column_gaps):
|
||||||
|
self.number_of_columns = len(column_gaps) + 1
|
||||||
|
|
||||||
|
|
||||||
class Page(object):
|
class Page(object):
|
||||||
|
|
||||||
@ -138,19 +159,24 @@ class Page(object):
|
|||||||
|
|
||||||
|
|
||||||
for hb in self.horizontal_boxes:
|
for hb in self.horizontal_boxes:
|
||||||
hb.sort()
|
hb.sort(self.left_margin, self.right_margin)
|
||||||
|
|
||||||
self.horizontal_boxes.sort(cmp=lambda x,y: cmp(x.bottom, y.bottom))
|
self.horizontal_boxes.sort(cmp=lambda x,y: cmp(x.bottom, y.bottom))
|
||||||
|
|
||||||
def identify_columns(self):
|
def identify_columns(self):
|
||||||
|
|
||||||
def neighborhood(i):
|
def neighborhood(i):
|
||||||
if i == 0:
|
if i == len(self.horizontal_boxes)-1:
|
||||||
return self.horizontal_boxes[1:3]
|
return self.horizontal_boxes[i-2:i]
|
||||||
|
if i == len(self.horizontal_boxes)-2:
|
||||||
return (self.horizontal_boxes[i-1], self.horizontal_boxes[i+1])
|
return (self.horizontal_boxes[i-1], self.horizontal_boxes[i+1])
|
||||||
|
return self.horizontal_boxes[i+1], self.horizontal_boxes[i+2]
|
||||||
|
|
||||||
for i, hbox in enumerate(self.horizontal_boxes):
|
for i, hbox in enumerate(self.horizontal_boxes):
|
||||||
pass
|
n1, n2 = neighborhood(i)
|
||||||
|
for gap in hbox.gaps:
|
||||||
|
gap.is_column_gap = n1.has_intersection_with(gap) and \
|
||||||
|
n2.has_intersection_with(gap)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user