mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix #4779 (Wall Street Journal (Free Content))
This commit is contained in:
parent
3df472ef71
commit
9ea276be20
@ -1,22 +0,0 @@
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
|
||||||
|
|
||||||
class The_Gazette(BasicNewsRecipe):
|
|
||||||
|
|
||||||
cover_url = 'file:///D:/Documents/Pictures/Covers/The_Gazette.jpg'
|
|
||||||
title = u'The Gazette'
|
|
||||||
__author__ = 'Jerry Clapperton'
|
|
||||||
description = 'Montreal news in English'
|
|
||||||
language = 'en_CA'
|
|
||||||
|
|
||||||
oldest_article = 7
|
|
||||||
max_articles_per_feed = 20
|
|
||||||
use_embedded_content = False
|
|
||||||
remove_javascript = True
|
|
||||||
no_stylesheets = True
|
|
||||||
encoding = 'utf-8'
|
|
||||||
|
|
||||||
keep_only_tags = [dict(name='div', attrs={'id':['storyheader','page1']})]
|
|
||||||
|
|
||||||
extra_css = '.headline {font-size: x-large;} \n .fact {padding-top: 10pt}'
|
|
||||||
|
|
||||||
feeds = [(u'News', u'http://feeds.canada.com/canwest/F297'), (u'Opinion', u'http://feeds.canada.com/canwest/F7383'), (u'Arts', u'http://feeds.canada.com/canwest/F7366'), (u'Life', u'http://rss.canada.com/get/?F6934'), (u'Business', u'http://feeds.canada.com/canwest/F6939'), (u'Travel', u'http://rss.canada.com/get/?F6938'), (u'Health', u'http://feeds.canada.com/canwest/F7397'), (u'Technology', u'http://feeds.canada.com/canwest/F7411')]
|
|
@ -215,7 +215,7 @@ class WSJ(BasicNewsRecipe):
|
|||||||
# first, check if there is an h3 tag which provides a section name
|
# first, check if there is an h3 tag which provides a section name
|
||||||
stag = divtag.find('h3')
|
stag = divtag.find('h3')
|
||||||
if stag:
|
if stag:
|
||||||
if stag.parent['class'] == 'dynamic':
|
if stag.parent.get('class', '') == 'dynamic':
|
||||||
# a carousel of articles is too complex to extract a section name
|
# a carousel of articles is too complex to extract a section name
|
||||||
# for each article, so we'll just call the section "Carousel"
|
# for each article, so we'll just call the section "Carousel"
|
||||||
section_name = 'Carousel'
|
section_name = 'Carousel'
|
||||||
|
@ -262,7 +262,6 @@ class Region(object):
|
|||||||
max_lines = max(max_lines, len(c))
|
max_lines = max(max_lines, len(c))
|
||||||
return max_lines
|
return max_lines
|
||||||
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def is_small(self):
|
def is_small(self):
|
||||||
return self.line_count < 3
|
return self.line_count < 3
|
||||||
@ -438,9 +437,8 @@ class Page(object):
|
|||||||
# absorb into a neighboring region (prefer the one with number of cols
|
# absorb into a neighboring region (prefer the one with number of cols
|
||||||
# closer to the avg number of cols in the set, if equal use larger
|
# closer to the avg number of cols in the set, if equal use larger
|
||||||
# region)
|
# region)
|
||||||
# merge contiguous regions that can contain each other
|
|
||||||
'''absorbed = set([])
|
|
||||||
found = True
|
found = True
|
||||||
|
absorbed = set([])
|
||||||
while found:
|
while found:
|
||||||
found = False
|
found = False
|
||||||
for i, region in enumerate(self.regions):
|
for i, region in enumerate(self.regions):
|
||||||
@ -452,10 +450,33 @@ class Page(object):
|
|||||||
regions.append(self.regions[j])
|
regions.append(self.regions[j])
|
||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
prev = None if i == 0 else i-1
|
prev_region = None if i == 0 else i-1
|
||||||
next = j if self.regions[j] not in regions else None
|
next_region = j if self.regions[j] not in regions else None
|
||||||
'''
|
if prev_region is None and next_region is not None:
|
||||||
pass
|
absorb_into = next_region
|
||||||
|
elif next_region is None and prev_region is not None:
|
||||||
|
absorb_into = prev_region
|
||||||
|
elif prev_region is None and next_region is None:
|
||||||
|
if len(regions) > 1:
|
||||||
|
absorb_into = regions[0]
|
||||||
|
regions = regions[1:]
|
||||||
|
else:
|
||||||
|
absorb_into = None
|
||||||
|
else:
|
||||||
|
absorb_into = prev_region
|
||||||
|
if next_region.line_count >= prev_region.line_count:
|
||||||
|
avg_column_count = sum([len(r.columns) for r in
|
||||||
|
regions])/float(len(regions))
|
||||||
|
if next_region.line_count > prev_region.line_count \
|
||||||
|
or abs(avg_column_count - len(prev_region.columns)) \
|
||||||
|
> abs(avg_column_count - len(next_region.columns)):
|
||||||
|
absorb_into = next_region
|
||||||
|
if absorb_into is not None:
|
||||||
|
absorb_into.absorb_region(regions)
|
||||||
|
absorbed.update(regions)
|
||||||
|
i = j
|
||||||
|
for region in absorbed:
|
||||||
|
self.regions.remove(region)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user