mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
Greensboro news and record by Walt Anthony
This commit is contained in:
parent
20d7986126
commit
ee5a5789dd
BIN
resources/images/news/greensboro_news_and_record.png
Normal file
BIN
resources/images/news/greensboro_news_and_record.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 480 B |
54
resources/recipes/greensboro_news_and_record.recipe
Normal file
54
resources/recipes/greensboro_news_and_record.recipe
Normal file
@ -0,0 +1,54 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Walt Anthony <workshop.northpole at gmail.com>'
|
||||
'''
|
||||
www.news-record.com
|
||||
'''
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class NewsandRecord(BasicNewsRecipe):
|
||||
title = u'Greensboro News & Record'
|
||||
description = "News from Greensboro, North Carolina"
|
||||
__author__ = 'Walt Anthony'
|
||||
publisher = 'News & Record and Landmark Media Enterprises, LLC'
|
||||
category = 'news, USA'
|
||||
oldest_article = 3 #days
|
||||
max_articles_per_feed = 25
|
||||
summary_length = 150
|
||||
language = 'en'
|
||||
encoding = 'utf-8'
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
|
||||
|
||||
remove_tags_before = dict(name='h3', attrs={'class':'nrcTxt_headline'})
|
||||
remove_tags_after = dict(name='div', attrs={'id':'nrcBlk_ContentBody'})
|
||||
|
||||
remove_tags = [
|
||||
dict(name='iframe'),
|
||||
dict(name=['notags','embed','object','link','img']),
|
||||
|
||||
]
|
||||
|
||||
|
||||
feeds = [
|
||||
('News', 'http://www.news-record.com/news/archive/feed'),
|
||||
('Greensboro News', 'http://www.news-record.com/news/greensboro/feed'),
|
||||
('Education', 'http://www.news-record.com/news/education/feed'),
|
||||
('Government', 'http://www.news-record.com/news/government/feed'),
|
||||
('College Sports', 'http://www.news-record.com/sports/college/feed'),
|
||||
('Sports Extra', 'http://www.news-record.com/blog/sportsextra/feed'),
|
||||
('Life', 'http://www.news-record.com/life/top/feed'),
|
||||
('NASCAR', 'http://www.news-record.com/sports/nascar/top/feed'),
|
||||
('Editorials', 'http://www.news-record.com/opinion/editorials/feed'),
|
||||
('Letters to the Editor', 'http://www.news-record.com/opinion/letters/feed')
|
||||
]
|
||||
|
@ -146,12 +146,14 @@ class Region(object):
|
||||
self.columns = []
|
||||
self.top = self.bottom = self.left = self.right = self.width = self.height = 0
|
||||
|
||||
def add_columns(self, columns):
|
||||
def add(self, columns):
|
||||
if not self.columns:
|
||||
for x in sorted(columns, cmp=lambda x,y: cmp(x.left, y.left)):
|
||||
self.columns.append(x)
|
||||
else:
|
||||
pass
|
||||
for i in range(len(columns)):
|
||||
for elem in columns[i]:
|
||||
self.columns[i].add(elem)
|
||||
|
||||
def contains(self, columns):
|
||||
if not self.columns:
|
||||
@ -168,6 +170,11 @@ class Region(object):
|
||||
return False
|
||||
return True
|
||||
|
||||
@property
|
||||
def is_empty(self):
|
||||
return len(self.elements) == 0
|
||||
|
||||
|
||||
class Page(object):
|
||||
|
||||
# Fraction of a character width that two strings have to be apart,
|
||||
@ -242,19 +249,25 @@ class Page(object):
|
||||
self.texts.remove(match)
|
||||
|
||||
def first_pass(self):
|
||||
'Sort page into regions and columns'
|
||||
self.regions = []
|
||||
if not self.elements:
|
||||
return
|
||||
for i, x in enumerate(self.elements):
|
||||
x.idx = i
|
||||
self.current_region = None
|
||||
current_region = Region()
|
||||
processed = set([])
|
||||
for x in self.elements:
|
||||
if x in processed: continue
|
||||
elems = set(self.find_elements_in_row_of(x))
|
||||
columns = self.sort_into_columns(x, elems)
|
||||
processed.update(elems)
|
||||
columns
|
||||
if not current_region.contains(columns):
|
||||
self.regions.append(self.current_region)
|
||||
current_region = Region()
|
||||
current_region.add(columns)
|
||||
if not self.current_region.is_empty():
|
||||
self.regions.append(current_region)
|
||||
|
||||
def sort_into_columns(self, elem, neighbors):
|
||||
columns = [Column()]
|
||||
|
Loading…
x
Reference in New Issue
Block a user