mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge from trunk
This commit is contained in:
commit
468dcea634
BIN
resources/images/news/akter.png
Normal file
BIN
resources/images/news/akter.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 429 B |
78
resources/recipes/akter.recipe
Normal file
78
resources/recipes/akter.recipe
Normal file
@ -0,0 +1,78 @@
|
|||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||||
|
'''
|
||||||
|
akter.co.rs
|
||||||
|
'''
|
||||||
|
|
||||||
|
import re
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
class Akter(BasicNewsRecipe):
|
||||||
|
title = 'AKTER'
|
||||||
|
__author__ = 'Darko Miletic'
|
||||||
|
description = 'AKTER - nedeljni politicki magazin savremene Srbije'
|
||||||
|
publisher = 'Akter Media Group d.o.o.'
|
||||||
|
category = 'vesti, online vesti, najnovije vesti, politika, sport, ekonomija, biznis, finansije, berza, kultura, zivot, putovanja, auto, automobili, tehnologija, politicki magazin, dogadjaji, desavanja, lifestyle, zdravlje, zdravstvo, vest, novine, nedeljnik, srbija, novi sad, vojvodina, svet, drustvo, zabava, republika srpska, beograd, intervju, komentar, reportaza, arhiva vesti, news, serbia, politics'
|
||||||
|
oldest_article = 8
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
no_stylesheets = False
|
||||||
|
use_embedded_content = False
|
||||||
|
encoding = 'utf-8'
|
||||||
|
masthead_url = 'http://www.akter.co.rs/templates/gk_thenews2/images/style2/logo.png'
|
||||||
|
language = 'sr'
|
||||||
|
publication_type = 'magazine'
|
||||||
|
remove_empty_feeds = True
|
||||||
|
PREFIX = 'http://www.akter.co.rs'
|
||||||
|
extra_css = """ @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
|
||||||
|
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
||||||
|
.article_description,body,.lokacija{font-family: Arial,Helvetica,sans1,sans-serif}
|
||||||
|
.color-2{display:block; margin-bottom: 10px; padding: 5px, 10px;
|
||||||
|
border-left: 1px solid #D00000; color: #D00000}
|
||||||
|
img{margin-bottom: 0.8em} """
|
||||||
|
|
||||||
|
conversion_options = {
|
||||||
|
'comment' : description
|
||||||
|
, 'tags' : category
|
||||||
|
, 'publisher' : publisher
|
||||||
|
, 'language' : language
|
||||||
|
, 'linearize_tables' : True
|
||||||
|
}
|
||||||
|
|
||||||
|
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||||
|
|
||||||
|
feeds = [
|
||||||
|
(u'Politika' , u'http://www.akter.co.rs/index.php/politikaprint.html' )
|
||||||
|
,(u'Ekonomija' , u'http://www.akter.co.rs/index.php/ekonomijaprint.html')
|
||||||
|
,(u'Life&Style' , u'http://www.akter.co.rs/index.php/lsprint.html' )
|
||||||
|
,(u'Sport' , u'http://www.akter.co.rs/index.php/sportprint.html' )
|
||||||
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
for item in soup.findAll(style=True):
|
||||||
|
del item['style']
|
||||||
|
return self.adeify_images(soup)
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return url + '?tmpl=component&print=1&page='
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
totalfeeds = []
|
||||||
|
lfeeds = self.get_feeds()
|
||||||
|
for feedobj in lfeeds:
|
||||||
|
feedtitle, feedurl = feedobj
|
||||||
|
self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
|
||||||
|
articles = []
|
||||||
|
soup = self.index_to_soup(feedurl)
|
||||||
|
for item in soup.findAll(attrs={'class':['sectiontableentry1','sectiontableentry2']}):
|
||||||
|
link = item.find('a')
|
||||||
|
url = self.PREFIX + link['href']
|
||||||
|
title = self.tag_to_string(link)
|
||||||
|
articles.append({
|
||||||
|
'title' :title
|
||||||
|
,'date' :''
|
||||||
|
,'url' :url
|
||||||
|
,'description':''
|
||||||
|
})
|
||||||
|
totalfeeds.append((feedtitle, articles))
|
||||||
|
return totalfeeds
|
||||||
|
|
@ -3,9 +3,8 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import string
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
from calibre import strftime
|
|
||||||
|
|
||||||
# http://online.wsj.com/page/us_in_todays_paper.html
|
# http://online.wsj.com/page/us_in_todays_paper.html
|
||||||
|
|
||||||
@ -72,56 +71,61 @@ class WallStreetJournal(BasicNewsRecipe):
|
|||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
soup = self.wsj_get_index()
|
soup = self.wsj_get_index()
|
||||||
|
|
||||||
year = strftime('%Y')
|
date = soup.find('span', attrs={'class':'date-date'})
|
||||||
for x in soup.findAll('td', height='25', attrs={'class':'b14'}):
|
if date is not None:
|
||||||
txt = self.tag_to_string(x).strip()
|
self.timefmt = ' [%s]'%self.tag_to_string(date)
|
||||||
txt = txt.replace(u'\xa0', ' ')
|
|
||||||
txt = txt.encode('ascii', 'ignore')
|
|
||||||
if year in txt:
|
|
||||||
self.timefmt = ' [%s]'%txt
|
|
||||||
break
|
|
||||||
|
|
||||||
left_column = soup.find(
|
sections = {}
|
||||||
text=lambda t: 'begin ITP Left Column' in str(t))
|
sec_order = []
|
||||||
|
for a in soup.findAll('a', attrs={'class':'mjLinkItem'}, href=True):
|
||||||
table = left_column.findNext('table')
|
container = a.findParent(['li', 'div'])
|
||||||
|
if container.name == 'div':
|
||||||
current_section = None
|
section = 'Page One'
|
||||||
current_articles = []
|
else:
|
||||||
feeds = []
|
section = ''
|
||||||
for x in table.findAllNext(True):
|
sec = container.find('a', href=lambda x: x and '/search?' in x)
|
||||||
if x.name == 'td' and x.get('class', None) == 'b13':
|
if sec is not None:
|
||||||
if current_articles and current_section:
|
section = self.tag_to_string(sec).strip()
|
||||||
feeds.append((current_section, current_articles))
|
if not section:
|
||||||
current_section = self.tag_to_string(x.a).strip()
|
h = container.find(['h1','h2','h3','h4','h5','h6'])
|
||||||
current_articles = []
|
section = self.tag_to_string(h)
|
||||||
self.log('\tProcessing section:', current_section)
|
section = string.capitalize(section).replace('U.s.', 'U.S.')
|
||||||
if current_section is not None and x.name == 'a' and \
|
if section not in sections:
|
||||||
x.get('class', None) == 'bold80':
|
sections[section] = []
|
||||||
title = self.tag_to_string(x)
|
sec_order.append(section)
|
||||||
url = x.get('href', False)
|
meta = a.find(attrs={'class':'meta_sectionName'})
|
||||||
if not url or not title:
|
if meta is not None:
|
||||||
continue
|
meta.extract()
|
||||||
url = url.partition('#')[0]
|
title = self.tag_to_string(a).strip() + ' [%s]'%self.tag_to_string(meta)
|
||||||
|
url = 'http://online.wsj.com'+a['href']
|
||||||
desc = ''
|
desc = ''
|
||||||
d = x.findNextSibling(True)
|
p = container.find('p')
|
||||||
if d is not None and d.get('class', None) == 'arialResize':
|
if p is not None:
|
||||||
desc = self.tag_to_string(d)
|
desc = self.tag_to_string(p)
|
||||||
desc = desc.partition(u'\u2022')[0]
|
|
||||||
self.log('\t\tFound article:', title)
|
sections[section].append({'title':title, 'url':url,
|
||||||
self.log('\t\t\t', url)
|
|
||||||
if url.startswith('/'):
|
|
||||||
url = 'http://online.wsj.com'+url
|
|
||||||
if desc:
|
|
||||||
self.log('\t\t\t', desc)
|
|
||||||
current_articles.append({'title': title, 'url':url,
|
|
||||||
'description':desc, 'date':''})
|
'description':desc, 'date':''})
|
||||||
|
|
||||||
if current_articles and current_section:
|
self.log('Found article:', title)
|
||||||
feeds.append((current_section, current_articles))
|
|
||||||
|
|
||||||
|
a.extract()
|
||||||
|
for a in container.findAll('a', href=lambda x: x and '/article/'
|
||||||
|
in x):
|
||||||
|
url = a['href']
|
||||||
|
if not url.startswith('http:'):
|
||||||
|
url = 'http://online.wsj.com'+url
|
||||||
|
title = self.tag_to_string(a).strip()
|
||||||
|
if not title or title.startswith('['): continue
|
||||||
|
if title:
|
||||||
|
sections[section].append({'title':self.tag_to_string(a),
|
||||||
|
'url':url, 'description':'', 'date':''})
|
||||||
|
self.log('\tFound related:', title)
|
||||||
|
|
||||||
|
|
||||||
|
feeds = [(sec, sections[sec]) for sec in sec_order]
|
||||||
return feeds
|
return feeds
|
||||||
|
|
||||||
|
|
||||||
def cleanup(self):
|
def cleanup(self):
|
||||||
self.browser.open('http://online.wsj.com/logout?url=http://online.wsj.com')
|
self.browser.open('http://online.wsj.com/logout?url=http://online.wsj.com')
|
||||||
|
|
||||||
|
@ -61,6 +61,7 @@ class FormatState(object):
|
|||||||
self.italic = False
|
self.italic = False
|
||||||
self.bold = False
|
self.bold = False
|
||||||
self.strikethrough = False
|
self.strikethrough = False
|
||||||
|
self.underline = False
|
||||||
self.preserve = False
|
self.preserve = False
|
||||||
self.family = 'serif'
|
self.family = 'serif'
|
||||||
self.bgcolor = 'transparent'
|
self.bgcolor = 'transparent'
|
||||||
@ -79,7 +80,8 @@ class FormatState(object):
|
|||||||
and self.family == other.family \
|
and self.family == other.family \
|
||||||
and self.bgcolor == other.bgcolor \
|
and self.bgcolor == other.bgcolor \
|
||||||
and self.fgcolor == other.fgcolor \
|
and self.fgcolor == other.fgcolor \
|
||||||
and self.strikethrough == other.strikethrough
|
and self.strikethrough == other.strikethrough \
|
||||||
|
and self.underline == other.underline
|
||||||
|
|
||||||
def __ne__(self, other):
|
def __ne__(self, other):
|
||||||
return not self.__eq__(other)
|
return not self.__eq__(other)
|
||||||
@ -251,6 +253,8 @@ class MobiMLizer(object):
|
|||||||
color=unicode(istate.fgcolor))
|
color=unicode(istate.fgcolor))
|
||||||
if istate.strikethrough:
|
if istate.strikethrough:
|
||||||
inline = etree.SubElement(inline, XHTML('s'))
|
inline = etree.SubElement(inline, XHTML('s'))
|
||||||
|
if istate.underline:
|
||||||
|
inline = etree.SubElement(inline, XHTML('u'))
|
||||||
bstate.inline = inline
|
bstate.inline = inline
|
||||||
bstate.istate = istate
|
bstate.istate = istate
|
||||||
inline = bstate.inline
|
inline = bstate.inline
|
||||||
@ -330,6 +334,7 @@ class MobiMLizer(object):
|
|||||||
istate.bgcolor = style['background-color']
|
istate.bgcolor = style['background-color']
|
||||||
istate.fgcolor = style['color']
|
istate.fgcolor = style['color']
|
||||||
istate.strikethrough = style['text-decoration'] == 'line-through'
|
istate.strikethrough = style['text-decoration'] == 'line-through'
|
||||||
|
istate.underline = style['text-decoration'] == 'underline'
|
||||||
if 'monospace' in style['font-family']:
|
if 'monospace' in style['font-family']:
|
||||||
istate.family = 'monospace'
|
istate.family = 'monospace'
|
||||||
elif 'sans-serif' in style['font-family']:
|
elif 'sans-serif' in style['font-family']:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user