mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Improved recipe for Welt
This commit is contained in:
parent
d928b34798
commit
02e372769d
@ -15,12 +15,13 @@ class weltDe(BasicNewsRecipe):
|
||||
__author__ = 'Oliver Niesner'
|
||||
use_embedded_content = False
|
||||
timefmt = ' [%d %b %Y]'
|
||||
max_articles_per_feed = 15 # reduced to this value to prevent too many articles (suggested by Gregory Riker
|
||||
max_articles_per_feed = 15
|
||||
linearize_tables = True
|
||||
no_stylesheets = True
|
||||
remove_stylesheets = True
|
||||
remove_javascript = True
|
||||
language = 'de'
|
||||
encoding = 'iso-8859-1'
|
||||
BasicNewsRecipe.summary_length = 200
|
||||
|
||||
|
||||
remove_tags = [dict(id='jumplinks'),
|
||||
@ -43,10 +44,14 @@ class weltDe(BasicNewsRecipe):
|
||||
dict(id='servicesBox'),
|
||||
dict(id='toggleAdvancedSearch'),
|
||||
dict(id='mainNav'),
|
||||
dict(id='ratingBox5136466_1'),
|
||||
dict(id='ratingBox5136466_2'),
|
||||
dict(id='articleInlineMediaBox0'),
|
||||
dict(id='sectionSponsor'),
|
||||
dict(id='sprucharea'),
|
||||
dict(id='xmsg_recommendEmail'),
|
||||
dict(id='xmsg_recommendSms'),
|
||||
dict(id='xmsg_comment'),
|
||||
dict(id='additionalNavWrapper'),
|
||||
dict(id='imagebox'),
|
||||
#dict(id=''),
|
||||
dict(name='span'),
|
||||
dict(name='div', attrs={'class':'printURL'}),
|
||||
@ -65,10 +70,21 @@ class weltDe(BasicNewsRecipe):
|
||||
dict(name='ul', attrs={'class':'optionsSubNav clear'}),
|
||||
dict(name='li', attrs={'class':'next'}),
|
||||
dict(name='li', attrs={'class':'prev'}),
|
||||
dict(name='li', attrs={'class':'last'}),
|
||||
dict(name='table', attrs={'class':'textGallery'}),
|
||||
dict(name='li', attrs={'class':'active'})]
|
||||
|
||||
remove_tags_after = [dict(id='tw_link_widget')]
|
||||
|
||||
extra_css = '''
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-size: x-small; color: #003399;}
|
||||
a{font-family:Arial,Helvetica,sans-serif; font-size: x-small; font-style:italic;}
|
||||
.dachzeile p{font-family:Arial,Helvetica,sans-serif; font-size: x-small; }
|
||||
h1{ font-family:Arial,Helvetica,sans-serif; font-size:x-large; font-weight:bold;}
|
||||
.artikelTeaser{font-family:Arial,Helvetica,sans-serif; font-size: x-small; font-weight:bold; }
|
||||
body{font-family:Arial,Helvetica,sans-serif; }
|
||||
.photo {font-family:Arial,Helvetica,sans-serif; font-size: x-small; color: #666666;} '''
|
||||
|
||||
feeds = [ ('Politik', 'http://welt.de/politik/?service=Rss'),
|
||||
('Deutsche Dinge', 'http://www.welt.de/deutsche-dinge/?service=Rss'),
|
||||
('Wirtschaft', 'http://welt.de/wirtschaft/?service=Rss'),
|
||||
|
@ -78,7 +78,7 @@ class HorizontalBox(object):
|
||||
def append(self, t):
|
||||
self.texts.append(t)
|
||||
|
||||
def sort(self):
|
||||
def sort(self, left_margin, right_margin):
|
||||
self.texts.sort(cmp=lambda x,y: cmp(x.left, y.left))
|
||||
self.top, self.bottom = sys.maxint, 0
|
||||
for t in self.texts:
|
||||
@ -86,6 +86,27 @@ class HorizontalBox(object):
|
||||
self.bottom = max(self.bottom, t.bottom)
|
||||
self.left = self.texts[0].left
|
||||
self.right = self.texts[-1].right
|
||||
self.gaps = []
|
||||
for i, t in enumerate(self.texts[1:]):
|
||||
gap = Interval(self.texts[i].right, t.left)
|
||||
if gap.width > 3:
|
||||
self.gaps.append(gap)
|
||||
left = Interval(left_margin, self.texts[0].left)
|
||||
if left.width > 3:
|
||||
self.gaps.insert(0, left)
|
||||
right = Interval(self.texts[-1].right, right_margin)
|
||||
if right.width > 3:
|
||||
self.gaps.append(right)
|
||||
|
||||
def has_intersection_with(self, gap):
|
||||
for g in self.gaps:
|
||||
if g.intersection(gap):
|
||||
return True
|
||||
return False
|
||||
|
||||
def identify_columns(self, column_gaps):
|
||||
self.number_of_columns = len(column_gaps) + 1
|
||||
|
||||
|
||||
class Page(object):
|
||||
|
||||
@ -138,19 +159,24 @@ class Page(object):
|
||||
|
||||
|
||||
for hb in self.horizontal_boxes:
|
||||
hb.sort()
|
||||
hb.sort(self.left_margin, self.right_margin)
|
||||
|
||||
self.horizontal_boxes.sort(cmp=lambda x,y: cmp(x.bottom, y.bottom))
|
||||
|
||||
def identify_columns(self):
|
||||
|
||||
def neighborhood(i):
|
||||
if i == 0:
|
||||
return self.horizontal_boxes[1:3]
|
||||
return (self.horizontal_boxes[i-1], self.horizontal_boxes[i+1])
|
||||
if i == len(self.horizontal_boxes)-1:
|
||||
return self.horizontal_boxes[i-2:i]
|
||||
if i == len(self.horizontal_boxes)-2:
|
||||
return (self.horizontal_boxes[i-1], self.horizontal_boxes[i+1])
|
||||
return self.horizontal_boxes[i+1], self.horizontal_boxes[i+2]
|
||||
|
||||
for i, hbox in enumerate(self.horizontal_boxes):
|
||||
pass
|
||||
n1, n2 = neighborhood(i)
|
||||
for gap in hbox.gaps:
|
||||
gap.is_column_gap = n1.has_intersection_with(gap) and \
|
||||
n2.has_intersection_with(gap)
|
||||
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user