mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Sync to trunk.
This commit is contained in:
commit
cc2155e671
4210
Changelog.old.yaml
4210
Changelog.old.yaml
File diff suppressed because it is too large
Load Diff
4468
Changelog.yaml
4468
Changelog.yaml
File diff suppressed because it is too large
Load Diff
@ -10,11 +10,11 @@ class Alternet(BasicNewsRecipe):
|
||||
category = 'News, Magazine'
|
||||
description = 'News magazine and online community'
|
||||
feeds = [
|
||||
(u'Front Page', u'http://feeds.feedblitz.com/alternet'),
|
||||
(u'Breaking News', u'http://feeds.feedblitz.com/alternet_breaking_news'),
|
||||
(u'Top Ten Campaigns', u'http://feeds.feedblitz.com/alternet_top_10_campaigns'),
|
||||
(u'Special Coverage Areas', u'http://feeds.feedblitz.com/alternet_coverage')
|
||||
]
|
||||
(u'Front Page', u'http://feeds.feedblitz.com/alternet'),
|
||||
(u'Breaking News', u'http://feeds.feedblitz.com/alternet_breaking_news'),
|
||||
(u'Top Ten Campaigns', u'http://feeds.feedblitz.com/alternet_top_10_campaigns'),
|
||||
(u'Special Coverage Areas', u'http://feeds.feedblitz.com/alternet_coverage')
|
||||
]
|
||||
remove_attributes = ['width', 'align','cellspacing']
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
@ -36,3 +36,5 @@ class Alternet(BasicNewsRecipe):
|
||||
self.temp_files[-1].write(html)
|
||||
self.temp_files[-1].close()
|
||||
return self.temp_files[-1].name
|
||||
|
||||
conversion_options = {'linearize_tables': True}
|
||||
|
@ -11,7 +11,6 @@ class AssociatedPress(BasicNewsRecipe):
|
||||
language = 'en'
|
||||
no_stylesheets = True
|
||||
max_articles_per_feed = 15
|
||||
html2lrf_options = ['--force-page-break-before-tag="chapter"']
|
||||
|
||||
|
||||
preprocess_regexps = [ (re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in
|
||||
|
44
recipes/birmingham_post.recipe
Normal file
44
recipes/birmingham_post.recipe
Normal file
@ -0,0 +1,44 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||
title = u'Birmingham post'
|
||||
description = 'News for Birmingham UK'
|
||||
timefmt = ''
|
||||
__author__ = 'Dave Asbury'
|
||||
cover_url = 'http://1.bp.blogspot.com/_GwWyq5eGw9M/S9BHPHxW55I/AAAAAAAAB6Q/iGCWl0egGzg/s320/Birmingham+post+Lite+front.JPG'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 20
|
||||
remove_empty_feeds = True
|
||||
remove_javascript = True
|
||||
auto_cleanup = True
|
||||
language = 'en_GB'
|
||||
|
||||
|
||||
masthead_url = 'http://www.pressgazette.co.uk/Pictures/web/t/c/g/birmingham_post.jpg'
|
||||
|
||||
|
||||
keep_only_tags = [
|
||||
#dict(name='h1',attrs={'id' : 'article-headline'}),
|
||||
#dict(attrs={'class':['article-meta-author','article-meta-date','article main','art-o art-align-center otm-1 ']}),
|
||||
#dict(name='p')
|
||||
#dict(attrs={'id' : 'three-col'})
|
||||
]
|
||||
remove_tags = [
|
||||
# dict(name='div',attrs={'class' : 'span-33 last header-links'})
|
||||
|
||||
]
|
||||
feeds = [
|
||||
#(u'News',u'http://www.birminghampost.net/news/rss.xml'),
|
||||
(u'Local News', u'http://www.birminghampost.net/news/west-midlands-news/rss.xml'),
|
||||
(u'UK News', u'http://www.birminghampost.net/news/uk-news/rss.xml'),
|
||||
(u'Sports',u'http://www.birminghampost.net/midlands-birmingham-sport/rss.xml'),
|
||||
(u'Bloggs & Comments',u'http://www.birminghampost.net/comment/rss.xml')
|
||||
|
||||
]
|
||||
extra_css = '''
|
||||
body {font: sans-serif medium;}'
|
||||
h1 {text-align : center; font-family:Arial,Helvetica,sans-serif; font-size:20px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold;}
|
||||
h2 {text-align : center;color:#4D4D4D;font-family:Arial,Helvetica,sans-serif; font-size:15px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:bold; }
|
||||
span{ font-size:9.5px; font-weight:bold;font-style:italic}
|
||||
p { text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
|
||||
|
||||
'''
|
@ -1,6 +1,6 @@
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
blic.rs
|
||||
'''
|
||||
@ -73,7 +73,10 @@ class Blic(BasicNewsRecipe):
|
||||
def print_version(self, url):
|
||||
return url + '/print'
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup('http://www.blic.rs/')
|
||||
alink = soup.find('a', attrs={'id':'blic_naslovna_print'})
|
||||
if alink:
|
||||
return 'http://www.blic.rs' + alink['href']
|
||||
return None
|
||||
|
@ -7,6 +7,7 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||
description = 'Fashion, beauty and Gossip for women from COSMOPOLITAN -UK'
|
||||
|
||||
__author__ = 'Dave Asbury'
|
||||
#last update 21/12/11
|
||||
# greyscale code by Starson
|
||||
cover_url = 'http://www.cosmopolitan.magazine.co.uk/files/4613/2085/8988/Cosmo_Cover3.jpg'
|
||||
no_stylesheets = True
|
||||
@ -31,8 +32,9 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||
dict(name='div',attrs={'class' : ['blogInfo','viral_toolbar','comment_number','prevEntry nav']}),
|
||||
dict(name='div',attrs={'class' : 'blog_module_about_the_authors'}),
|
||||
dict(attrs={'id': ['breadcrumbs','comment','related_links_list','right_rail','content_sec_fb_more','content_sec_mostpopularstories','content-sec_fb_frame_viewfb_bot']}),
|
||||
dict(attrs={'class' : ['read_liked_that_header','fb_back_next_area']})
|
||||
]
|
||||
dict(attrs={'class' : ['read_liked_that_header','fb_back_next_area']}),
|
||||
dict(name='li',attrs={'class' : 'thumb'})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Love & Sex', u'http://www.cosmopolitan.co.uk/love-sex/rss/'), (u'Men', u'http://cosmopolitan.co.uk/men/rss/'), (u'Fashion', u'http://cosmopolitan.co.uk/fashion/rss/'), (u'Hair & Beauty', u'http://cosmopolitan.co.uk/beauty-hair/rss/'), (u'LifeStyle', u'http://cosmopolitan.co.uk/lifestyle/rss/'), (u'Cosmo On Campus', u'http://cosmopolitan.co.uk/campus/rss/'), (u'Celebrity Gossip', u'http://cosmopolitan.co.uk/celebrity-gossip/rss/')]
|
||||
@ -48,4 +50,3 @@ class AdvancedUserRecipe1306097511(BasicNewsRecipe):
|
||||
img.type = "GrayscaleType"
|
||||
img.save(iurl)
|
||||
return soup
|
||||
|
||||
|
@ -5,7 +5,7 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
|
||||
description = 'News as provide by The Daily Mirror -UK'
|
||||
|
||||
__author__ = 'Dave Asbury'
|
||||
# last updated 30/10/11
|
||||
# last updated 26/12/11
|
||||
language = 'en_GB'
|
||||
|
||||
cover_url = 'http://yookeo.com/screens/m/i/mirror.co.uk.jpg'
|
||||
@ -13,30 +13,22 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
|
||||
masthead_url = 'http://www.nmauk.co.uk/nma/images/daily_mirror.gif'
|
||||
|
||||
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 30
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 20
|
||||
remove_empty_feeds = True
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
extra_css = '''
|
||||
body{ text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
|
||||
'''
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div',attrs={'id' : 'body-content'})
|
||||
]
|
||||
|
||||
remove_tags_after = [dict (name='div',attrs={'class' : 'related'})]
|
||||
|
||||
auto_cleanup = True
|
||||
remove_tags = [
|
||||
dict(name='div',attrs={'id' : ['sidebar','menu','search-box','roffers-top']}),
|
||||
dict(name='div',attrs={'class' :['inline-ad span-16 last','article-resize','related','list teasers']}),
|
||||
dict(attrs={'class' : ['channellink','article-tags','replace','append-html']}),
|
||||
dict(name='div',attrs={'class' : 'span-12 last sl-others addthis_toolbox addthis_default_style'})
|
||||
dict(name='title'),
|
||||
dict(name='div',attrs={'class' : ['inline-ad span-16 last','caption']}),
|
||||
]
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<dl class="q-search">.*?</dl>', re.IGNORECASE | re.DOTALL), lambda match: '')]
|
||||
(re.compile(r'- mirror.co.uk', re.IGNORECASE | re.DOTALL), lambda match: '')]
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'Advertisement >>', re.IGNORECASE | re.DOTALL), lambda match: '')]
|
||||
|
||||
|
||||
feeds = [
|
||||
@ -53,5 +45,10 @@ class AdvancedUserRecipe1306061239(BasicNewsRecipe):
|
||||
,(u'Travel','http://www.mirror.co.uk/advice/travel/rss.xml')
|
||||
|
||||
# example of commented out feed not needed ,(u'Travel','http://www.mirror.co.uk/advice/travel/rss.xml')
|
||||
|
||||
]
|
||||
extra_css = '''
|
||||
body{ text-align: justify; font-family:Arial,Helvetica,sans-serif; font-size:11px; font-size-adjust:none; font-stretch:normal; font-style:normal; font-variant:normal; font-weight:normal;}
|
||||
h1{ font-size:18px;}
|
||||
img { display:block}
|
||||
'''
|
||||
|
||||
|
11
recipes/derin_dusunce.recipe
Normal file
11
recipes/derin_dusunce.recipe
Normal file
@ -0,0 +1,11 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class BasicUserRecipe1324913694(BasicNewsRecipe):
|
||||
title = u'Derin Dusunce'
|
||||
language = 'tr'
|
||||
__author__ = 'asalet_r'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 20
|
||||
auto_cleanup = True
|
||||
|
||||
feeds = [(u'Derin D\xfc\u015f\xfcnce', u'http://www.derindusunce.org/feed/')]
|
12
recipes/dunya_bizim.recipe
Normal file
12
recipes/dunya_bizim.recipe
Normal file
@ -0,0 +1,12 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class BasicUserRecipe1324736687(BasicNewsRecipe):
|
||||
title = u'D\xfcnya Bizim'
|
||||
language = 'tr'
|
||||
__author__ = 'asalet_r'
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 10
|
||||
auto_cleanup = True
|
||||
|
||||
feeds = [(u'Aktif \u0130mamlar', u'http://dunyabizim.com/servisler/rss.php?kategoriID=31'), (u'Ayr\u0131nt\u0131 Defteri', u'http://dunyabizim.com/servisler/rss.php?kategoriID=58'), (u'Baba Kitaplar', u'http://dunyabizim.com/servisler/rss.php?kategoriID=4'), (u'Bu da Oldu', u'http://dunyabizim.com/servisler/rss.php?kategoriID=32'), (u'\xc7-al\u0131nt\u0131 Yaz\u0131lar', u'http://dunyabizim.com/servisler/rss.php?kategoriID=33'), (u'Dar\xfclmedya', u'http://dunyabizim.com/servisler/rss.php?kategoriID=49'), (u'Gidenler', u'http://dunyabizim.com/servisler/rss.php?kategoriID=59'), (u'G\xfczel Mekanlar', u'http://dunyabizim.com/servisler/rss.php?kategoriID=43'), (u'\u0130yi Haberler', u'http://dunyabizim.com/servisler/rss.php?kategoriID=18'), (u'\u0130yi M\xfczikler', u'http://dunyabizim.com/servisler/rss.php?kategoriID=2'), (u'Kalite Dergiler', u'http://dunyabizim.com/servisler/rss.php?kategoriID=3'), (u'Konu\u015fa Konu\u015fa', u'http://dunyabizim.com/servisler/rss.php?kategoriID=24'), (u'M\xfcstesta G\xfczeller', u'http://dunyabizim.com/servisler/rss.php?kategoriID=65'), (u'O \u015eimdi Nerede?', u'http://dunyabizim.com/servisler/rss.php?kategoriID=52'), (u'Olsa Ke\u015fke', u'http://dunyabizim.com/servisler/rss.php?kategoriID=34'), (u'Orada Ne Oldu?', u'http://dunyabizim.com/servisler/rss.php?kategoriID=38'), (u'\xd6nemli Adamlar', u'http://dunyabizim.com/servisler/rss.php?kategoriID=1'), (u'Polemik', u'http://dunyabizim.com/servisler/rss.php?kategoriID=39'), (u'Sinema', u'http://dunyabizim.com/servisler/rss.php?kategoriID=23'), (u'Yalan Haber', u'http://dunyabizim.com/servisler/rss.php?kategoriID=40'), (u'Yeni \u015eeyler', u'http://dunyabizim.com/servisler/rss.php?kategoriID=57'), (u'Zekeriya Sofras\u0131', u'http://dunyabizim.com/servisler/rss.php?kategoriID=60')]
|
12
recipes/dunya_bulteni.recipe
Normal file
12
recipes/dunya_bulteni.recipe
Normal file
@ -0,0 +1,12 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class BasicUserRecipe1321194347(BasicNewsRecipe):
|
||||
title = u'D\xfcnya B\xfclteni'
|
||||
language = 'tr'
|
||||
__author__ = 'asalet_r'
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 50
|
||||
auto_cleanup = True
|
||||
|
||||
feeds = [(u'Tarih Dosyas\u0131', u'http://www.dunyabulteni.net/servisler/rss.php?kategoriID=157'), (u'R\xf6portaj', u'http://www.dunyabulteni.net/servisler/rss.php?kategoriID=153'), (u'Makale-Yorum', u'http://www.dunyabulteni.net/servisler/rss.php?kategoriID=174'), (u'K\xfclt\xfcr-Sanat', u'http://www.dunyabulteni.net/servisler/rss.php?kategoriID=66'), (u'Hayat\u0131n \u0130\xe7inden', u'http://www.dunyabulteni.net/servisler/rss.php?kategoriID=200'), (u'Haber Analiz', u'http://www.dunyabulteni.net/servisler/rss.php?kategoriID=123'), (u'Gezi-\u0130zlenim', u'http://www.dunyabulteni.net/servisler/rss.php?kategoriID=90'), (u'Aile Sa\u011fl\u0131k E\u011fitim', u'http://www.dunyabulteni.net/servisler/rss.php?kategoriID=75')]
|
46
recipes/echo_online.recipe
Normal file
46
recipes/echo_online.recipe
Normal file
@ -0,0 +1,46 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Kovid Goyal <kovid at kovidgoyal.net>, Armin Geller'
|
||||
'''
|
||||
Fetch echo-online.de
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
class Echo_Online(BasicNewsRecipe):
|
||||
title = u'Echo Online' # 2011-12-28 AGe
|
||||
description = '-Echo Online-'
|
||||
publisher = 'Echo Online GmbH'
|
||||
category = 'News, Germany'
|
||||
__author__ = 'Armin Geller' # 2011-12-28 AGe
|
||||
language = 'de'
|
||||
lang = 'de-DE'
|
||||
encoding = 'iso-8859-1'
|
||||
timefmt = ' [%a, %d %b %Y]'
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 50 # 2011-12-28 AGe
|
||||
no_stylesheets = True
|
||||
auto_cleanup = True
|
||||
remove_javascript = True
|
||||
|
||||
feeds = [
|
||||
(u'Topnews', u'http://www.echo-online.de/storage/rss/rss/topnews.xml'),
|
||||
(u'Darmstadt', u'http://www.echo-online.de/rss/darmstadt.xml'),
|
||||
(u'Darmstadt-Dieburg', u'http://www.echo-online.de/rss/darmstadtdieburg.xml'),
|
||||
(u'Kreis Gro\xdf-Gerau', u'http://www.echo-online.de/rss/kreisgrossgerau.xml'),
|
||||
(u'R\xfcsselsheim', u'http://www.echo-online.de/rss/ruesselsheim.xml'),
|
||||
(u'Kreis Bergstra\xdfe', u'http://www.echo-online.de/rss/bergstrasse.xml'),
|
||||
(u'Odenwaldkreis', u'http://www.echo-online.de/rss/odenwald.xml'),
|
||||
(u'SV 98', u'http://www.echo-online.de/rss/sv98.xml'),
|
||||
(u'Kino', u'http://www.echo-online.de/rss/kino.xml'),
|
||||
(u'Ausstellungen', u'http://www.echo-online.de/rss/ausstellungen.xml'),
|
||||
(u'Ausflug & Reise', u'http://www.echo-online.de/rss/ausflugreise.xml'),
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return self.browser.open_novisit(url).geturl() + '?_FRAME=33&_FORMAT=PRINT'
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'class':["header", "name"]}),]
|
||||
auto_cleanup_keep = '//div[@class="bild_gross w270"]'
|
||||
|
||||
cover_url = 'http://adcounter.darmstaedter-echo.de/webdav/files/config/gui/images/Zeitungsfaecher.gif'
|
||||
|
50
recipes/edge_conversations.recipe
Normal file
50
recipes/edge_conversations.recipe
Normal file
@ -0,0 +1,50 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012 Levien van Zon <levien@zonnetjes.net>'
|
||||
|
||||
'''
|
||||
Fetch Edge.org conversations
|
||||
'''
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class EdgeConversationRSS(BasicNewsRecipe):
|
||||
title = u'Edge.org Conversations'
|
||||
__author__ = 'levien'
|
||||
language = 'en'
|
||||
description = '''Edge.org offers "open-minded, free ranging, intellectually
|
||||
playful ... an unadorned pleasure in curiosity, a collective expression of
|
||||
wonder at the living and inanimate world ... an ongoing and thrilling
|
||||
colloquium.'''
|
||||
oldest_article = 60
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'HomeLeftPannel IMGCTRL'}) ]
|
||||
remove_tags = [
|
||||
dict(name='div',attrs={'class':'Logo'})
|
||||
]
|
||||
|
||||
feeds = [(u'Edge RSS', u'http://edge.org/feeds/')]
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('conversation/', 'conversation.php?cid=')
|
||||
|
||||
def parse_feeds(self):
|
||||
|
||||
# Call parent's method.
|
||||
feeds = BasicNewsRecipe.parse_feeds(self)
|
||||
|
||||
# Loop through all feeds.
|
||||
for feed in feeds:
|
||||
|
||||
# Loop through all articles in feed.
|
||||
for article in feed.articles[:]:
|
||||
|
||||
# Remove anything that is not a conversation, and remove PDF files as well...
|
||||
|
||||
if not ('CONVERSATION' in article.title):
|
||||
feed.articles.remove(article)
|
||||
elif 'pdf' in article.url:
|
||||
feed.articles.remove(article)
|
||||
|
||||
return feeds
|
||||
|
48
recipes/elet_es_irodalom.recipe
Normal file
48
recipes/elet_es_irodalom.recipe
Normal file
@ -0,0 +1,48 @@
|
||||
################################################################################
|
||||
#Description: http://es.hu/ RSS channel
|
||||
#Author: Bigpapa (bigpapabig@hotmail.com)
|
||||
#Date: 2010.12.01. - V1.0
|
||||
################################################################################
|
||||
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class elet_es_irodalom(BasicNewsRecipe):
|
||||
title = u'Elet es Irodalom'
|
||||
__author__ = 'Bigpapa'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 20 # Az adott e-bookban tarolt cikkek feedenkenti maximalis szamat adja meg.
|
||||
no_stylesheets = True
|
||||
#delay = 1
|
||||
use_embedded_content = False
|
||||
encoding = 'iso-8859-2'
|
||||
category = 'Cikkek'
|
||||
language = 'hu'
|
||||
publication_type = 'newsportal'
|
||||
extra_css = '.doc_title { font: bold 30px } .doc_author {font: bold 14px} '
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':['doc_author', 'doc_title', 'doc']})
|
||||
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='a', attrs={'target':['_TOP']}),
|
||||
dict(name='div', attrs={'style':['float: right; margin-left: 5px; margin-bottom: 5px;', 'float: right; margin-left: 5px; margin-bottom: 5px;']}),
|
||||
|
||||
|
||||
]
|
||||
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Publicisztika', 'http://www.feed43.com/4684235031168504.xml'),
|
||||
(u'Interj\xfa', 'http://www.feed43.com/4032465460040618.xml'),
|
||||
(u'Visszhang', 'http://www.feed43.com/3727375706873086.xml'),
|
||||
(u'P\xe1ratlan oldal', 'http://www.feed43.com/2525784782475057.xml'),
|
||||
(u'Feuilleton', 'http://www.feed43.com/7216025082703073.xml'),
|
||||
(u'Pr\xf3za', 'http://www.feed43.com/8760248802326384.xml'),
|
||||
(u'Vers', 'http://www.feed43.com/1737324675134275.xml'),
|
||||
(u'K\xf6nyvkritika', 'http://www.feed43.com/1281156550717082.xml'),
|
||||
(u'M\u0171b\xedr\xe1lat', 'http://www.feed43.com/1851854623681044.xml')
|
||||
|
||||
]
|
@ -20,7 +20,7 @@ class ESPN(BasicNewsRecipe):
|
||||
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
needs_subscription = True
|
||||
needs_subscription = 'optional'
|
||||
encoding= 'ISO-8859-1'
|
||||
|
||||
remove_tags_before = dict(name='font', attrs={'class':'date'})
|
||||
@ -75,32 +75,30 @@ class ESPN(BasicNewsRecipe):
|
||||
|
||||
return soup
|
||||
|
||||
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
br.set_handle_refresh(False)
|
||||
url = ('https://r.espn.go.com/members/v3_1/login')
|
||||
raw = br.open(url).read()
|
||||
raw = re.sub(r'(?s)<form>.*?id="regsigninbtn".*?</form>', '', raw)
|
||||
with TemporaryFile(suffix='.htm') as fname:
|
||||
with open(fname, 'wb') as f:
|
||||
f.write(raw)
|
||||
br.open_local_file(fname)
|
||||
if self.username and self.password:
|
||||
br.set_handle_refresh(False)
|
||||
url = ('https://r.espn.go.com/members/v3_1/login')
|
||||
raw = br.open(url).read()
|
||||
raw = re.sub(r'(?s)<form>.*?id="regsigninbtn".*?</form>', '', raw)
|
||||
with TemporaryFile(suffix='.htm') as fname:
|
||||
with open(fname, 'wb') as f:
|
||||
f.write(raw)
|
||||
br.open_local_file(fname)
|
||||
|
||||
br.form = br.forms().next()
|
||||
br.form.find_control(name='username', type='text').value = self.username
|
||||
br.form['password'] = self.password
|
||||
br.submit().read()
|
||||
br.open('http://espn.go.com').read()
|
||||
br.set_handle_refresh(True)
|
||||
br.form = br.forms().next()
|
||||
br.form.find_control(name='username', type='text').value = self.username
|
||||
br.form['password'] = self.password
|
||||
br.submit().read()
|
||||
br.open('http://espn.go.com').read()
|
||||
br.set_handle_refresh(True)
|
||||
return br
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.get('guid', None)
|
||||
|
||||
def print_version(self, url):
|
||||
|
||||
if 'eticket' in url:
|
||||
return url.partition('&')[0].replace('story?', 'print?')
|
||||
match = re.search(r'story\?(id=\d+)', url)
|
||||
|
30
recipes/fhm_uk.recipe
Normal file
30
recipes/fhm_uk.recipe
Normal file
@ -0,0 +1,30 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
||||
title = u'FHM UK'
|
||||
description = 'Good News for Men'
|
||||
cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/373529_38324934806_64930243_n.jpg'
|
||||
masthead_url = 'http://www.fhm.com/App_Resources/Images/Site/re-design/logo.gif'
|
||||
__author__ = 'Dave Asbury'
|
||||
# last updated 27/12/11
|
||||
language = 'en_GB'
|
||||
oldest_article = 28
|
||||
max_articles_per_feed = 12
|
||||
remove_empty_feeds = True
|
||||
no_stylesheets = True
|
||||
#auto_cleanup = True
|
||||
#articles_are_obfuscated = True
|
||||
keep_only_tags = [
|
||||
dict(name='h1'),
|
||||
dict(name='img',attrs={'id' : 'ctl00_Body_imgMainImage'}),
|
||||
dict(name='div',attrs={'id' : ['articleLeft']}),
|
||||
dict(name='div',attrs={'class' : ['imagesCenterArticle','containerCenterArticle','articleBody']}),
|
||||
|
||||
]
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'From the Homepage',u'http://feed43.com/8053226782885416.xml'),
|
||||
(u'The Final Countdown', u'http://feed43.com/3576106158530118.xml'),
|
||||
(u'Gaming',u'http://feed43.com/0755006465351035.xml'),
|
||||
]
|
@ -1,4 +1,3 @@
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class GlasgowHerald(BasicNewsRecipe):
|
||||
@ -9,12 +8,16 @@ class GlasgowHerald(BasicNewsRecipe):
|
||||
language = 'en_GB'
|
||||
|
||||
__author__ = 'Kovid Goyal'
|
||||
use_embedded_content = False
|
||||
|
||||
keep_only_tags = [dict(attrs={'class':'article'})]
|
||||
remove_tags = [
|
||||
dict(id=['pic-nav']),
|
||||
dict(attrs={'class':['comments-top']})
|
||||
]
|
||||
no_stylesheets = True
|
||||
auto_cleanup = True
|
||||
|
||||
#keep_only_tags = [dict(attrs={'class':'article'})]
|
||||
#remove_tags = [
|
||||
#dict(id=['pic-nav']),
|
||||
#dict(attrs={'class':['comments-top']})
|
||||
#]
|
||||
|
||||
|
||||
feeds = [
|
||||
@ -26,4 +29,3 @@ class GlasgowHerald(BasicNewsRecipe):
|
||||
u'http://www.heraldscotland.com/cmlink/1.768',),
|
||||
(u'Columnists', u'http://www.heraldscotland.com/cmlink/1.658574')]
|
||||
|
||||
|
||||
|
13
recipes/goal.recipe
Normal file
13
recipes/goal.recipe
Normal file
@ -0,0 +1,13 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1325677767(BasicNewsRecipe):
|
||||
title = u'Goal'
|
||||
oldest_article = 1
|
||||
language = 'it'
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
remove_tags_after = [dict(id='article_content')]
|
||||
feeds = [(u'Goal', u'http://www.goal.com/it/feeds/news?fmt=rss')]
|
||||
__author__ = 'faber1971'
|
||||
description = 'Sports news from Italy'
|
||||
|
106
recipes/grantland.recipe
Normal file
106
recipes/grantland.recipe
Normal file
@ -0,0 +1,106 @@
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class GrantLand(BasicNewsRecipe):
|
||||
title = u"Grantland"
|
||||
description = 'Writings on Sports & Pop Culture'
|
||||
language = 'en'
|
||||
__author__ = 'barty on mobileread.com forum'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = False
|
||||
# auto_cleanup is too aggressive sometimes and we end up with blank articles
|
||||
auto_cleanup = False
|
||||
timefmt = ' [%a, %d %b %Y]'
|
||||
oldest_article = 365
|
||||
|
||||
cover_url = 'http://cdn0.sbnation.com/imported_assets/740965/blog_grantland_grid_3.jpg'
|
||||
masthead_url = 'http://a1.espncdn.com/prod/assets/grantland/grantland-logo.jpg'
|
||||
|
||||
INDEX = 'http://www.grantland.com'
|
||||
CATEGORIES = [
|
||||
# comment out categories you don't want
|
||||
# (user friendly name, url suffix, max number of articles to load)
|
||||
('Today in Grantland','',20),
|
||||
('In Case You Missed It','incaseyoumissedit',35),
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
{'name':['head','style','script']},
|
||||
{'id':['header']},
|
||||
{'class':re.compile(r'\bside|\bad\b|floatright|tags')}
|
||||
]
|
||||
remove_tags_before = {'class':'wrapper'}
|
||||
remove_tags_after = [{'id':'content'}]
|
||||
|
||||
preprocess_regexps = [
|
||||
# <header> tags with an img inside are just blog banners, don't need them
|
||||
# note: there are other useful <header> tags so we don't want to just strip all of them
|
||||
(re.compile(r'<header class.+?<img .+?>.+?</header>', re.DOTALL|re.IGNORECASE),lambda m: ''),
|
||||
# delete everything between the *last* <hr class="small" /> and </article>
|
||||
(re.compile(r'<hr class="small"(?:(?!<hr class="small").)+</article>', re.DOTALL|re.IGNORECASE),lambda m: '<hr class="small" /></article>'),
|
||||
]
|
||||
extra_css = """cite, time { font-size: 0.8em !important; margin-right: 1em !important; }
|
||||
img + cite { display:block; text-align:right}"""
|
||||
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
seen_urls = set([])
|
||||
|
||||
for category in self.CATEGORIES:
|
||||
|
||||
(cat_name, tag, max_articles) = category
|
||||
self.log('Reading category:', cat_name)
|
||||
articles = []
|
||||
|
||||
page = "%s/%s" % (self.INDEX, tag)
|
||||
soup = self.index_to_soup(page)
|
||||
headers = soup.findAll('h2' if tag=='' else 'h3')
|
||||
|
||||
for header in headers:
|
||||
tag = header.find('a',href=True)
|
||||
if tag is None:
|
||||
continue
|
||||
url = tag['href']
|
||||
if url in seen_urls:
|
||||
continue
|
||||
title = self.tag_to_string(tag)
|
||||
if 'Podcast:' in title or 'In Case You Missed It' in title:
|
||||
continue
|
||||
desc = dt = ''
|
||||
# get at the div that contains description and other info
|
||||
div = header.parent.find('div')
|
||||
if div is not None:
|
||||
desc = self.tag_to_string(div)
|
||||
dt = div.find('time')
|
||||
if dt is not None:
|
||||
dt = self.tag_to_string( dt)
|
||||
|
||||
# if div contains the same url that is in h2/h3
|
||||
# that means this is a series split into multiple articles
|
||||
if div.find('a',href=url):
|
||||
self.log('\tFound series:', title)
|
||||
# grab all articles in series
|
||||
for tag in div.findAll('a',href=True):
|
||||
url = tag['href']
|
||||
if url in seen_urls:
|
||||
continue
|
||||
self.log('\t', url)
|
||||
seen_urls.add(url)
|
||||
articles.append({'title':title+' - '+self.tag_to_string( tag),
|
||||
'url':url,'description':desc,'date':dt})
|
||||
else:
|
||||
self.log('\tFound article:', title)
|
||||
self.log('\t', url)
|
||||
seen_urls.add(url)
|
||||
articles.append({'title':title,'url':url,'description':desc,'date':dt})
|
||||
|
||||
if len(articles) >= max_articles:
|
||||
break
|
||||
|
||||
if articles:
|
||||
feeds.append((cat_name, articles))
|
||||
|
||||
return feeds
|
||||
|
||||
def print_version(self, url):
|
||||
return url+'?view=print'
|
11
recipes/haksoz.recipe
Normal file
11
recipes/haksoz.recipe
Normal file
@ -0,0 +1,11 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class BasicUserRecipe1324739199(BasicNewsRecipe):
|
||||
title = u'Haks\xf6z'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 20
|
||||
auto_cleanup = True
|
||||
language = 'tr'
|
||||
__author__ = 'asalet_r'
|
||||
|
||||
feeds = [(u'Haks\xf6z', u'http://www.haksozhaber.net/rss/')]
|
58
recipes/hamilton_spectator.recipe
Normal file
58
recipes/hamilton_spectator.recipe
Normal file
@ -0,0 +1,58 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
'''
|
||||
Hamilton Spectator Calibre Recipe
|
||||
'''
|
||||
class HamiltonSpectator(BasicNewsRecipe):
|
||||
title = u'Hamilton Spectator'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
__author__ = u'Eric Coolman'
|
||||
publisher = u'thespec.com'
|
||||
description = u'Ontario Canada Newspaper'
|
||||
category = u'News, Ontario, Canada'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
language = 'en_CA'
|
||||
encoding = 'utf-8'
|
||||
|
||||
feeds = [
|
||||
(u'Top Stories',u'http://www.thespec.com/rss?query=/&assetType=Article'),
|
||||
(u'All News',u'http://www.thespec.com/rss?query=/news&assetType=Article'),
|
||||
(u'Local',u'http://www.thespec.com/rss?query=/local&assetType=Article'),
|
||||
(u'Ontario',u'http://www.thespec.com/rss?query=/ontario&assetType=Article'),
|
||||
(u'Canada',u'http://www.thespec.com/rss?query=/canada&assetType=Article'),
|
||||
(u'World News',u'http://www.thespec.com/rss?query=/world&assetType=Article'),
|
||||
(u'Business',u'http://www.thespec.com/rss?query=/business&assetType=Article'),
|
||||
(u'Crime',u'http://www.thespec.com/rss?query=/crime&assetType=Article'),
|
||||
(u'All Sports',u'http://www.thespec.com/rss?query=/sports&assetType=Article'),
|
||||
(u'Ticats',u'http://www.thespec.com/rss?query=/sports/ticats&assetType=Article'),
|
||||
(u'Bulldogs',u'http://www.thespec.com/rss?query=/sports/bulldogs&assetType=Article'),
|
||||
(u'High School Sports',u'http://www.thespec.com/rss?query=/sports/highschools&assetType=Article'),
|
||||
(u'Local Sports',u'http://www.thespec.com/rss?query=/sports/local&assetType=Article'),
|
||||
(u'What''s On',u'http://www.thespec.com/rss?query=/whatson&assetType=Article'),
|
||||
(u'Arts and Entertainment',u'http://www.thespec.com/rss?query=/whatson/artsentertainment&assetType=Article'),
|
||||
(u'Books',u'http://www.thespec.com/rss?query=/whatson/books&assetType=Article'),
|
||||
(u'Movies',u'http://www.thespec.com/rss?query=/whatson/movies&assetType=Article'),
|
||||
(u'Music',u'http://www.thespec.com/rss?query=/whatson/music&assetType=Article'),
|
||||
(u'Restaurant Reviews',u'http://www.thespec.com/rss?query=/whatson/restaurants&assetType=Article'),
|
||||
(u'Opinion',u'http://www.thespec.com/rss?query=/opinion&assetType=Article'),
|
||||
(u'Opinion Columns',u'http://www.thespec.com/rss?query=/opinion/columns&assetType=Article'),
|
||||
(u'Cartoons',u'http://www.thespec.com/rss?query=/opinion/cartoons&assetType=Article'),
|
||||
(u'Letters',u'http://www.thespec.com/rss?query=/opinion/letters&assetType=Article'),
|
||||
(u'Editorial',u'http://www.thespec.com/rss?query=/opinion/editorial&assetType=Article'),
|
||||
(u'Community',u'http://www.thespec.com/rss?query=/community&assetType=Article'),
|
||||
(u'Education',u'http://www.thespec.com/rss?query=/community/education&assetType=Article'),
|
||||
(u'Faith',u'http://www.thespec.com/rss?query=/community/faith&assetType=Article'),
|
||||
(u'Contests',u'http://www.thespec.com/rss?query=/community/contests&assetType=Article'),
|
||||
(u'Living',u'http://www.thespec.com/rss?query=/living&assetType=Article'),
|
||||
(u'Food',u'http://www.thespec.com/rss?query=/living/food&assetType=Article'),
|
||||
(u'Health and Fitness',u'http://www.thespec.com/rss?query=/living/healthfitness&assetType=Article'),
|
||||
(u'Your Home',u'http://www.thespec.com/rss?query=/living/home&assetType=Article'),
|
||||
(u'Travel',u'http://www.thespec.com/rss?query=/living/travel&assetType=Article'),
|
||||
(u'Family and Parenting',u'http://www.thespec.com/rss?query=/living/familyparenting&assetType=Article'),
|
||||
(u'Style',u'http://www.thespec.com/rss?query=/living/style&assetType=Article')
|
||||
]
|
||||
|
@ -1,4 +1,5 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
import urllib, re
|
||||
|
||||
class HindustanTimes(BasicNewsRecipe):
|
||||
title = u'Hindustan Times'
|
||||
@ -26,4 +27,24 @@ class HindustanTimes(BasicNewsRecipe):
|
||||
'http://feeds.hindustantimes.com/HT-Homepage-LifestyleNews'),
|
||||
]
|
||||
|
||||
def get_article_url(self, article):
|
||||
'''
|
||||
HT uses a variant of the feedportal RSS ad display mechanism
|
||||
'''
|
||||
try:
|
||||
s = article.summary
|
||||
return urllib.unquote(
|
||||
re.search(r'href=".+?bookmark.cfm.+?link=(.+?)"', s).group(1))
|
||||
except:
|
||||
pass
|
||||
url = BasicNewsRecipe.get_article_url(self, article)
|
||||
res = self.browser.open_novisit(url)
|
||||
url = res.geturl().split('/')[-2]
|
||||
encoding = {'0B': '.', '0C': '/', '0A': '0', '0F': '=', '0G': '&',
|
||||
'0D': '?', '0E': '-', '0N': '.com', '0L': 'http://', '0S':
|
||||
'www.'}
|
||||
for k, v in encoding.iteritems():
|
||||
url = url.replace(k, v)
|
||||
return url
|
||||
|
||||
|
||||
|
@ -1,44 +1,58 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
################################################################################
|
||||
#Description: http://hvg.hu/ RSS channel
|
||||
#Author: Bigpapa (bigpapabig@hotmail.com)
|
||||
#Date: 2011.12.20. - V1.1
|
||||
################################################################################
|
||||
|
||||
class HVG(BasicNewsRecipe):
|
||||
title = 'HVG.HU'
|
||||
__author__ = u'István Papp'
|
||||
description = u'Friss hírek a HVG-től'
|
||||
timefmt = ' [%Y. %b. %d., %a.]'
|
||||
oldest_article = 4
|
||||
language = 'hu'
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'utf8'
|
||||
publisher = 'HVG Online'
|
||||
category = u'news, hírek, hvg'
|
||||
extra_css = 'body{ font-family: Verdana,Helvetica,Arial,sans-serif } .introduction{font-weight: bold} .story-feature{display: block; padding: 0; border: 1px solid; width: 40%; font-size: small} .story-feature h2{text-align: center; text-transform: uppercase} '
|
||||
preprocess_regexps = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
|
||||
remove_tags_before = dict(id='pg-content')
|
||||
remove_javascript = True
|
||||
remove_empty_feeds = True
|
||||
class hvg(BasicNewsRecipe):
|
||||
title = u'HVG'
|
||||
__author__ = 'Bigpapa'
|
||||
language = 'hu'
|
||||
oldest_article = 5 # Hany napos legyen a legregebbi cikk amit leszedjen.
|
||||
max_articles_per_feed = 5 # Az adott e-bookban tarolt cikkek feedenkenti maximalis szamat adja meg.
|
||||
no_stylesheets = True
|
||||
encoding = 'utf8'
|
||||
extra_css = ' h2 { font:bold 28px} '
|
||||
|
||||
feeds = [
|
||||
(u'Itthon', u'http://hvg.hu/rss/itthon')
|
||||
,(u'Világ', u'http://hvg.hu/rss/vilag')
|
||||
,(u'Gazdaság', u'http://hvg.hu/rss/gazdasag')
|
||||
,(u'IT | Tudomány', u'http://hvg.hu/rss/tudomany')
|
||||
,(u'Panoráma', u'http://hvg.hu/rss/Panorama')
|
||||
,(u'Karrier', u'http://hvg.hu/rss/karrier')
|
||||
,(u'Gasztronómia', u'http://hvg.hu/rss/gasztronomia')
|
||||
,(u'Helyi érték', u'http://hvg.hu/rss/helyiertek')
|
||||
,(u'Kultúra', u'http://hvg.hu/rss/kultura')
|
||||
,(u'Cégautó', u'http://hvg.hu/rss/cegauto')
|
||||
,(u'Vállalkozó szellem', u'http://hvg.hu/rss/kkv')
|
||||
,(u'Egészség', u'http://hvg.hu/rss/egeszseg')
|
||||
,(u'Vélemény', u'http://hvg.hu/rss/velemeny')
|
||||
,(u'Sport', u'http://hvg.hu/rss/sport')
|
||||
]
|
||||
remove_attributes = ['style','font', 'href']
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace ('#rss', '/print')
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':['pg-content']})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['box articlemenu', 'bannergoogle468', 'boxcontainer left', 'boxcontainer', 'commentbox']}),
|
||||
dict(name='table', attrs={'class':['banner2', 'monocle']}),
|
||||
dict(name='div', attrs={'id':['connect_widget_4cf63ca849ddf4577922632', 'sharetip', 'upprev_box']}),
|
||||
dict(name='div', attrs={'style':['float: right; margin-bottom: 5px;', 'display: none;']}),
|
||||
dict(name='h3', attrs={'class':['hthree']}),
|
||||
dict(name='ul', attrs={'class':['defaultul']}),
|
||||
dict(name='form', attrs={'id':['commentForm']}),
|
||||
dict(name='h6', attrs={'class':['hthree']}),
|
||||
dict(name='h6', attrs={'class':['more2']}),
|
||||
dict(name='img', attrs={'class':['framed']}),
|
||||
dict(name='td', attrs={'class':['greyboxbody','embedvideobody','embedvideofooter','embedvideobottom']}),
|
||||
|
||||
|
||||
|
||||
]
|
||||
|
||||
feeds = [
|
||||
# (u'\xd6sszes', 'http://hvg.hu/rss'),
|
||||
(u'Itthon', 'http://hvg.hu/rss/itthon'),
|
||||
(u'Vil\xe1g', 'http://hvg.hu/rss/vilag'),
|
||||
(u'Gazdas\xe1g', 'http://hvg.hu/rss/gazdasag'),
|
||||
(u'Tudom\xe1ny', 'http://hvg.hu/rss/tudomany'),
|
||||
(u'Panor\xe1ma', 'http://hvg.hu/rss/panorama'),
|
||||
(u'Karrier', 'http://hvg.hu/rss/karrier'),
|
||||
(u'Gasztron\xf3mia', 'http://hvg.hu/rss/gasztronomia'),
|
||||
(u'Helyi \xe9rt\xe9k', 'http://hvg.hu/rss/helyiertek'),
|
||||
(u'Kult\xfara', 'http://hvg.hu/rss/kultura'),
|
||||
(u'C\xe9gaut\xf3', 'http://hvg.hu/rss/cegauto'),
|
||||
(u'V\xe1llalkoz\xf3 szellem', 'http://hvg.hu/rss/kkv'),
|
||||
(u'Eg\xe9szs\xe9g', 'http://hvg.hu/rss/egeszseg'),
|
||||
(u'V\xe9lem\xe9ny', 'http://hvg.hu/rss/velemeny'),
|
||||
(u'Sport', 'http://hvg.hu/rss/sport')
|
||||
]
|
BIN
recipes/icons/mlody_technik_pl.png
Normal file
BIN
recipes/icons/mlody_technik_pl.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 2.1 KiB |
Binary file not shown.
Before Width: | Height: | Size: 15 KiB |
BIN
recipes/icons/moneynews.png
Normal file
BIN
recipes/icons/moneynews.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 914 B |
BIN
recipes/icons/novilist_novine_hr.png
Normal file
BIN
recipes/icons/novilist_novine_hr.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 241 B |
BIN
recipes/icons/novilist_portal_hr.png
Normal file
BIN
recipes/icons/novilist_portal_hr.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 944 B |
BIN
recipes/icons/rionegro.png
Normal file
BIN
recipes/icons/rionegro.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 817 B |
68
recipes/ideal_almeria.recipe
Normal file
68
recipes/ideal_almeria.recipe
Normal file
@ -0,0 +1,68 @@
|
||||
# encoding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'Josemi Liébana <office at josemi-liebana.com>'
|
||||
__copyright__ = 'Josemi Liébana'
|
||||
__version__ = 'v0.1'
|
||||
__date__ = '5 January 2012'
|
||||
|
||||
|
||||
'''
|
||||
www.ideal.es
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Ideal(BasicNewsRecipe):
|
||||
title = u'Ideal (Edición Almería)'
|
||||
__author__ = u'Josemi Liébana'
|
||||
description = u'Noticias de Almería y el resto del mundo'
|
||||
publisher = 'Ideal'
|
||||
category = u'News, Politics, Spain, Almería'
|
||||
publication_type = 'Newspaper'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
encoding = 'cp1252'
|
||||
use_embedded_content = False
|
||||
language = 'es'
|
||||
remove_empty_feeds = True
|
||||
masthead_url = u'http://www.ideal.es/img/rd.logotipo2_ideal.gif'
|
||||
cover_url = u'http://www.ideal.es/granada/noticias/201112/24/Media/Granada/portada--647x894.JPG'
|
||||
extra_css = u' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} .photo-caption{font-size: x-small} '
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(attrs={'id':'title'})
|
||||
,dict(attrs={'class':['overhead','headline','subhead','date','text','noticia_cont','desarrollo']})
|
||||
]
|
||||
|
||||
remove_tags = [dict(name='ul')]
|
||||
|
||||
remove_attributes = ['width','height']
|
||||
|
||||
feeds = [
|
||||
(u'Última Hora' , u'http://www.ideal.es/almeria/rss/feeds/ultima.xml' )
|
||||
,(u'Portada' , u'http://www.ideal.es/almeria/portada.xml' )
|
||||
,(u'Local' , u'http://www.ideal.es/almeria/rss/feeds/granada.xml' )
|
||||
,(u'Deportes' , u'http://www.ideal.es/almeria/rss/feeds/deportes.xml' )
|
||||
,(u'Sociedad' , u'http://www.ideal.es/almeria/rss/feeds/sociedad.xml' )
|
||||
,(u'Cultura' , u'http://www.ideal.es/almeria/rss/feeds/cultura.xml' )
|
||||
,(u'Economía' , u'http://www.ideal.es/almeria/rss/feeds/economia.xml' )
|
||||
,(u'Costa' , u'http://www.ideal.es/almeria/rss/feeds/costa.xml' )
|
||||
,(u'Puerta Purchena' , u'http://www.ideal.es/almeria/rss/feeds/puerta_purchena.xml' )
|
||||
,(u'Andalucía' , u'http://www.ideal.es/almeria/rss/feeds/andalucia.xml' )
|
||||
,(u'España' , u'http://www.ideal.es/almeria/rss/feeds/espana.xml' )
|
||||
,(u'Mundo' , u'http://www.ideal.es/almeria/rss/feeds/internacional.xml' )
|
||||
,(u'Vivir' , u'http://www.ideal.es/almeria/rss/feeds/vivir.xml' )
|
||||
,(u'Opinión' , u'http://www.ideal.es/almeria/rss/feeds/opinion.xml' )
|
||||
,(u'Televisión' , u'http://www.ideal.es/almeria/rss/feeds/television.xml' )
|
||||
,(u'Contraportada' , u'http://www.ideal.es/almeria/rss/feeds/contraportada.xml' )
|
||||
]
|
||||
|
69
recipes/ideal_granada.recipe
Normal file
69
recipes/ideal_granada.recipe
Normal file
@ -0,0 +1,69 @@
|
||||
# encoding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'Josemi Liébana <office at josemi-liebana.com>'
|
||||
__copyright__ = 'Josemi Liébana'
|
||||
__version__ = 'v0.1'
|
||||
__date__ = '5 January 2012'
|
||||
|
||||
|
||||
'''
|
||||
www.ideal.es
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Ideal(BasicNewsRecipe):
|
||||
title = u'Ideal (Edición Granada)'
|
||||
__author__ = u'Josemi Liébana'
|
||||
description = u'Noticias de Granada y el resto del mundo'
|
||||
publisher = 'Ideal'
|
||||
category = 'News, Politics, Spain, Granada'
|
||||
publication_type = 'Newspaper'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
encoding = 'cp1252'
|
||||
use_embedded_content = False
|
||||
language = 'es'
|
||||
remove_empty_feeds = True
|
||||
masthead_url = 'http://www.ideal.es/img/rd.logotipo2_ideal.gif'
|
||||
cover_url = 'http://www.ideal.es/granada/noticias/201112/24/Media/Granada/portada--647x894.JPG'
|
||||
extra_css = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} .photo-caption{font-size: x-small} '
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(attrs={'id':'title'})
|
||||
,dict(attrs={'class':['overhead','headline','subhead','date','text','noticia_cont','desarrollo']})
|
||||
]
|
||||
|
||||
remove_tags = [dict(name='ul')]
|
||||
|
||||
remove_attributes = ['width','height']
|
||||
|
||||
feeds = [
|
||||
(u'Última Hora' , u'http://www.ideal.es/granada/rss/feeds/ultima.xml' )
|
||||
,(u'Portada' , u'http://www.ideal.es/granada/portada.xml' )
|
||||
,(u'Local' , u'http://www.ideal.es/granada/rss/feeds/granada.xml' )
|
||||
,(u'Deportes' , u'http://www.ideal.es/granada/rss/feeds/deportes.xml' )
|
||||
,(u'Sociedad' , u'http://www.ideal.es/granada/rss/feeds/sociedad.xml' )
|
||||
,(u'Cultura' , u'http://www.ideal.es/granada/rss/feeds/cultura.xml' )
|
||||
,(u'Economía' , u'http://www.ideal.es/granada/rss/feeds/economia.xml' )
|
||||
,(u'Costa' , u'http://www.ideal.es/granada/rss/feeds/costa.xml' )
|
||||
,(u'La Carrera' , u'http://www.ideal.es/granada/rss/feeds/la_carrera.xml' )
|
||||
,(u'Puerta Real' , u'http://www.ideal.es/granada/rss/feeds/puerta_real.xml' )
|
||||
,(u'Andalucía' , u'http://www.ideal.es/granada/rss/feeds/andalucia.xml' )
|
||||
,(u'España' , u'http://www.ideal.es/granada/rss/feeds/espana.xml' )
|
||||
,(u'Mundo' , u'http://www.ideal.es/granada/rss/feeds/internacional.xml' )
|
||||
,(u'Vivir' , u'http://www.ideal.es/granada/rss/feeds/vivir.xml' )
|
||||
,(u'Opinión' , u'http://www.ideal.es/granada/rss/feeds/opinion.xml' )
|
||||
,(u'Televisión' , u'http://www.ideal.es/granada/rss/feeds/television.xml' )
|
||||
,(u'Contraportada' , u'http://www.ideal.es/granada/rss/feeds/contraportada.xml' )
|
||||
]
|
||||
|
67
recipes/ideal_jaen.recipe
Normal file
67
recipes/ideal_jaen.recipe
Normal file
@ -0,0 +1,67 @@
|
||||
# encoding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'Josemi Liébana <office at josemi-liebana.com>'
|
||||
__copyright__ = 'Josemi Liébana'
|
||||
__version__ = 'v0.1'
|
||||
__date__ = '5 January 2012'
|
||||
|
||||
|
||||
'''
|
||||
www.ideal.es
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Ideal(BasicNewsRecipe):
|
||||
title = u'Ideal (Edición Jaén)'
|
||||
__author__ = u'Josemi Liébana'
|
||||
description = u'Noticias de Jaén y el resto del mundo'
|
||||
publisher = 'Ideal'
|
||||
category = u'News, Politics, Spain, Jaén'
|
||||
publication_type = 'Newspaper'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
encoding = 'cp1252'
|
||||
use_embedded_content = False
|
||||
language = 'es'
|
||||
remove_empty_feeds = True
|
||||
masthead_url = 'http://www.ideal.es/img/rd.logotipo2_ideal.gif'
|
||||
cover_url = 'http://www.ideal.es/granada/noticias/201112/24/Media/Granada/portada--647x894.JPG'
|
||||
extra_css = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} .photo-caption{font-size: x-small} '
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(attrs={'id':'title'})
|
||||
,dict(attrs={'class':['overhead','headline','subhead','date','text','noticia_cont','desarrollo']})
|
||||
]
|
||||
|
||||
remove_tags = [dict(name='ul')]
|
||||
|
||||
remove_attributes = ['width','height']
|
||||
|
||||
feeds = [
|
||||
(u'Última Hora' , u'http://www.ideal.es/jaen/rss/feeds/ultima.xml' )
|
||||
,(u'Portada' , u'http://www.ideal.es/jaen/portada.xml' )
|
||||
,(u'Local' , u'http://www.ideal.es/jaen/rss/feeds/granada.xml' )
|
||||
,(u'Deportes' , u'http://www.ideal.es/jaen/rss/feeds/deportes.xml' )
|
||||
,(u'Sociedad' , u'http://www.ideal.es/jaen/rss/feeds/sociedad.xml' )
|
||||
,(u'Cultura' , u'http://www.ideal.es/jaen/rss/feeds/cultura.xml' )
|
||||
,(u'Economía' , u'http://www.ideal.es/jaen/rss/feeds/economia.xml' )
|
||||
,(u'Costa' , u'http://www.ideal.es/jaen/rss/feeds/costa.xml' )
|
||||
,(u'Andalucía' , u'http://www.ideal.es/jaen/rss/feeds/andalucia.xml' )
|
||||
,(u'España' , u'http://www.ideal.es/jaen/rss/feeds/espana.xml' )
|
||||
,(u'Mundo' , u'http://www.ideal.es/jaen/rss/feeds/internacional.xml' )
|
||||
,(u'Vivir' , u'http://www.ideal.es/jaen/rss/feeds/vivir.xml' )
|
||||
,(u'Opinión' , u'http://www.ideal.es/jaen/rss/feeds/opinion.xml' )
|
||||
,(u'Televisión' , u'http://www.ideal.es/jaen/rss/feeds/television.xml' )
|
||||
,(u'Contraportada' , u'http://www.ideal.es/jaen/rss/feeds/contraportada.xml' )
|
||||
]
|
||||
|
@ -1,63 +1,30 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Derry FitzGerald'
|
||||
'''
|
||||
iht.com
|
||||
'''
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
|
||||
class NYTimesGlobal(BasicNewsRecipe):
|
||||
title = u'NY Times Global'
|
||||
language = 'en'
|
||||
__author__ = 'Krittika Goyal'
|
||||
oldest_article = 1 #days
|
||||
max_articles_per_feed = 25
|
||||
use_embedded_content = False
|
||||
|
||||
class InternationalHeraldTribune(BasicNewsRecipe):
|
||||
title = u'The International Herald Tribune'
|
||||
__author__ = 'Derry FitzGerald'
|
||||
language = 'en'
|
||||
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 30
|
||||
no_stylesheets = True
|
||||
auto_cleanup = True
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'class':['footer','header']}),
|
||||
dict(name=['form'])]
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<!-- webtrends.*', re.DOTALL),
|
||||
lambda m:'</body></html>')
|
||||
]
|
||||
extra_css = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt }'
|
||||
|
||||
remove_empty_feeds = True
|
||||
|
||||
feeds = [
|
||||
(u'Frontpage', u'http://www.iht.com/rss/frontpage.xml'),
|
||||
(u'Business', u'http://www.iht.com/rss/business.xml'),
|
||||
(u'Americas', u'http://www.iht.com/rss/america.xml'),
|
||||
(u'Europe', u'http://www.iht.com/rss/europe.xml'),
|
||||
(u'Asia', u'http://www.iht.com/rss/asia.xml'),
|
||||
(u'Africa and Middle East', u'http://www.iht.com/rss/africa.xml'),
|
||||
(u'Opinion', u'http://www.iht.com/rss/opinion.xml'),
|
||||
(u'Technology', u'http://www.iht.com/rss/technology.xml'),
|
||||
(u'Health and Science', u'http://www.iht.com/rss/healthscience.xml'),
|
||||
(u'Sports', u'http://www.iht.com/rss/sports.xml'),
|
||||
(u'Culture', u'http://www.iht.com/rss/arts.xml'),
|
||||
(u'Style and Design', u'http://www.iht.com/rss/style.xml'),
|
||||
(u'Travel', u'http://www.iht.com/rss/travel.xml'),
|
||||
(u'At Home Abroad', u'http://www.iht.com/rss/athome.xml'),
|
||||
(u'Your Money', u'http://www.iht.com/rss/yourmoney.xml'),
|
||||
(u'Properties', u'http://www.iht.com/rss/properties.xml')
|
||||
]
|
||||
temp_files = []
|
||||
articles_are_obfuscated = True
|
||||
|
||||
masthead_url = 'http://graphics8.nytimes.com/images/misc/iht-masthead-logo.gif'
|
||||
|
||||
def get_obfuscated_article(self, url):
|
||||
br = self.get_browser()
|
||||
br.open(url)
|
||||
response1 = br.follow_link(url_regex=re.compile(r'.*pagewanted=print.*'))
|
||||
html = response1.read()
|
||||
|
||||
self.temp_files.append(PersistentTemporaryFile('_iht.html'))
|
||||
self.temp_files[-1].write(html)
|
||||
self.temp_files[-1].close()
|
||||
return self.temp_files[-1].name
|
||||
('NYTimes',
|
||||
'http://www.nytimes.com/services/xml/rss/nyt/HomePage.xml'),
|
||||
('NYTimes global',
|
||||
'http://www.nytimes.com/services/xml/rss/nyt/GlobalHome.xml'),
|
||||
('World',
|
||||
'http://www.nytimes.com/services/xml/rss/nyt/World.xml'),
|
||||
('U.S.',
|
||||
'http://www.nytimes.com/services/xml/rss/nyt/US.xml'),
|
||||
('Business',
|
||||
'http://feeds.nytimes.com/nyt/rss/Business'),
|
||||
('Sports',
|
||||
'http://www.nytimes.com/services/xml/rss/nyt/Sports.xml'),
|
||||
('Technology',
|
||||
'http://feeds.nytimes.com/nyt/rss/Technology'),
|
||||
]
|
||||
|
12
recipes/iktibas.recipe
Normal file
12
recipes/iktibas.recipe
Normal file
@ -0,0 +1,12 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class BasicUserRecipe1324739406(BasicNewsRecipe):
|
||||
title = u'\u0130ktibas'
|
||||
language = 'tr'
|
||||
__author__ = 'asalet_r'
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 20
|
||||
auto_cleanup = True
|
||||
|
||||
feeds = [(u'\u0130ktibas', u'http://www.iktibasdergisi.com/rss/rss.xml')]
|
@ -1,16 +1,20 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1234144423(BasicNewsRecipe):
|
||||
title = u'Indianapolis Star'
|
||||
oldest_article = 5
|
||||
language = 'en'
|
||||
class IndianapolisStar(BasicNewsRecipe):
|
||||
title = u'Indianapolis Star'
|
||||
oldest_article = 10
|
||||
auto_cleanup = True
|
||||
language = 'en'
|
||||
__author__ = 'Owen Kelly'
|
||||
max_articles_per_feed = 100
|
||||
cover_url = u'http://www2.indystar.com/frontpage/images/today.jpg'
|
||||
feeds = [(u'Community Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=LOCAL&template=rss'),
|
||||
(u'News Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=NEWS&template=rss'),
|
||||
(u'Business Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=BUSINESS&template=rss'),
|
||||
(u'Politics and Government', u'http://www.indystar.com/apps/pbcs.dll/section?Category=NEWS05&template=rss'),
|
||||
(u'Lifestyle Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=LIVING&template=rss&mime=XML'),
|
||||
(u'Opinion Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=OPINION&template=rss&mime=XML')
|
||||
]
|
||||
|
||||
__author__ = 'Owen Kelly'
|
||||
max_articles_per_feed = 100
|
||||
|
||||
cover_url = u'http://www2.indystar.com/frontpage/images/today.jpg'
|
||||
|
||||
feeds = [(u'Community Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=LOCAL&template=rss&mime=XML'), (u'News Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=NEWS&template=rss&mime=XML'), (u'Business Headlines', u'http://www..indystar.com/apps/pbcs.dll/section?Category=BUSINESS&template=rss&mime=XML'), (u'Sports Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=SPORTS&template=rss&mime=XML'), (u'Lifestyle Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=LIVING&template=rss&mime=XML'), (u'Opinion Headlines', u'http://www.indystar.com/apps/pbcs.dll/section?Category=OPINION&template=rss&mime=XML')]
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '&template=printart'
|
||||
def print_version(self, url):
|
||||
return url + '&template=printart'
|
||||
|
12
recipes/izdiham.com.recipe
Normal file
12
recipes/izdiham.com.recipe
Normal file
@ -0,0 +1,12 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class BasicUserRecipe1324158549(BasicNewsRecipe):
|
||||
title = u'izdiham.com'
|
||||
language = 'tr'
|
||||
__author__ = 'asalet_r'
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 20
|
||||
auto_cleanup = True
|
||||
|
||||
feeds = [(u'\u0130zdiham', u'http://www.izdiham.com/index.php/feed')]
|
@ -1,79 +1,79 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Attis <attis@attis.one.pl>'
|
||||
__copyright__ = '2011 Attis <attis@attis.one.pl>, 2012 Tomasz Długosz <tomek3d@gmail.com>'
|
||||
__version__ = 'v. 0.1'
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class KopalniaWiedzy(BasicNewsRecipe):
|
||||
title = u'Kopalnia Wiedzy'
|
||||
publisher = u'Kopalnia Wiedzy'
|
||||
description = u'Ciekawostki ze świata nauki i techniki'
|
||||
encoding = 'utf-8'
|
||||
__author__ = 'Attis'
|
||||
language = 'pl'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
INDEX = u'http://kopalniawiedzy.pl/'
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
title = u'Kopalnia Wiedzy'
|
||||
publisher = u'Kopalnia Wiedzy'
|
||||
description = u'Ciekawostki ze świata nauki i techniki'
|
||||
encoding = 'utf-8'
|
||||
__author__ = 'Attis & Tomasz Długosz'
|
||||
language = 'pl'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
INDEX = u'http://kopalniawiedzy.pl/'
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
remove_tags = [{'name':'p', 'attrs': {'class': 'keywords'} }, {'name':'div', 'attrs': {'class':'sexy-bookmarks sexy-bookmarks-bg-caring'}}]
|
||||
remove_tags_after = dict(attrs={'class':'ad-square'})
|
||||
keep_only_tags = [dict(name="div", attrs={'id':'articleContent'})]
|
||||
extra_css = '.topimage {margin-top: 30px}'
|
||||
remove_tags = [{'name':'p', 'attrs': {'class': 'keywords'}}, {'name':'div', 'attrs': {'class':'sexy-bookmarks sexy-bookmarks-bg-caring'}}, {'name':'div', 'attrs': {'class':'article-time-and-cat'}}, {'name':'p', 'attrs': {'class':'tags'}}]
|
||||
remove_tags_after = dict(attrs={'class':'ad-square'})
|
||||
keep_only_tags = [dict(name="div", attrs={'class':'article-text text-small'})]
|
||||
extra_css = '.topimage {margin-top: 30px}'
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(u'<a .* rel="lightboxText" .*><img (.*)></a>'),
|
||||
lambda match: '<img class="topimage" ' + match.group(1) + '>' ),
|
||||
(re.compile(u'<br /><br />'),
|
||||
lambda match: '<br\/>')
|
||||
]
|
||||
preprocess_regexps = [
|
||||
(re.compile(u'<a .* rel="lightboxText" .*><img (.*)></a>'),
|
||||
lambda match: '<img class="topimage" ' + match.group(1) + '>' ),
|
||||
(re.compile(u'<br /><br />'),
|
||||
lambda match: '<br\/>')
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Biologia', u'http://kopalniawiedzy.pl/wiadomosci_biologia.rss'),
|
||||
(u'Medycyna', u'http://kopalniawiedzy.pl/wiadomosci_medycyna.rss'),
|
||||
(u'Psychologia', u'http://kopalniawiedzy.pl/wiadomosci_psychologia.rss'),
|
||||
(u'Technologie', u'http://kopalniawiedzy.pl/wiadomosci_technologie.rss'),
|
||||
(u'Ciekawostki', u'http://kopalniawiedzy.pl/wiadomosci_ciekawostki.rss'),
|
||||
(u'Artykuły', u'http://kopalniawiedzy.pl/artykuly.rss')
|
||||
]
|
||||
feeds = [
|
||||
(u'Biologia', u'http://kopalniawiedzy.pl/wiadomosci_biologia.rss'),
|
||||
(u'Medycyna', u'http://kopalniawiedzy.pl/wiadomosci_medycyna.rss'),
|
||||
(u'Psychologia', u'http://kopalniawiedzy.pl/wiadomosci_psychologia.rss'),
|
||||
(u'Technologie', u'http://kopalniawiedzy.pl/wiadomosci_technologie.rss'),
|
||||
(u'Ciekawostki', u'http://kopalniawiedzy.pl/wiadomosci_ciekawostki.rss'),
|
||||
(u'Artykuły', u'http://kopalniawiedzy.pl/artykuly.rss')
|
||||
]
|
||||
|
||||
def is_link_wanted(self, url, tag):
|
||||
return tag['class'] == 'next'
|
||||
def is_link_wanted(self, url, tag):
|
||||
return tag['class'] == 'next'
|
||||
|
||||
def remove_beyond(self, tag, next):
|
||||
while tag is not None and getattr(tag, 'name', None) != 'body':
|
||||
after = getattr(tag, next)
|
||||
while after is not None:
|
||||
ns = getattr(tag, next)
|
||||
after.extract()
|
||||
after = ns
|
||||
tag = tag.parent
|
||||
def remove_beyond(self, tag, next):
|
||||
while tag is not None and getattr(tag, 'name', None) != 'body':
|
||||
after = getattr(tag, next)
|
||||
while after is not None:
|
||||
ns = getattr(tag, next)
|
||||
after.extract()
|
||||
after = ns
|
||||
tag = tag.parent
|
||||
|
||||
def append_page(self, soup, appendtag, position):
|
||||
pager = soup.find('a',attrs={'class':'next'})
|
||||
if pager:
|
||||
nexturl = self.INDEX + pager['href']
|
||||
soup2 = self.index_to_soup(nexturl)
|
||||
texttag = soup2.find('div', attrs={'id':'articleContent'})
|
||||
def append_page(self, soup, appendtag, position):
|
||||
pager = soup.find('a',attrs={'class':'next'})
|
||||
if pager:
|
||||
nexturl = self.INDEX + pager['href']
|
||||
soup2 = self.index_to_soup(nexturl)
|
||||
texttag = soup2.find('div', attrs={'id':'articleContent'})
|
||||
|
||||
tag = texttag.find(attrs={'class':'pages'})
|
||||
self.remove_beyond(tag, 'nextSibling')
|
||||
tag = texttag.find(attrs={'class':'pages'})
|
||||
self.remove_beyond(tag, 'nextSibling')
|
||||
|
||||
newpos = len(texttag.contents)
|
||||
self.append_page(soup2,texttag,newpos)
|
||||
newpos = len(texttag.contents)
|
||||
self.append_page(soup2,texttag,newpos)
|
||||
|
||||
appendtag.insert(position,texttag)
|
||||
appendtag.insert(position,texttag)
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
self.append_page(soup, soup.body, 3)
|
||||
def preprocess_html(self, soup):
|
||||
self.append_page(soup, soup.body, 3)
|
||||
|
||||
for item in soup.findAll('div',attrs={'class':'pages'}):
|
||||
item.extract()
|
||||
for item in soup.findAll('div',attrs={'class':'pages'}):
|
||||
item.extract()
|
||||
|
||||
for item in soup.findAll('p', attrs={'class':'wykop'}):
|
||||
item.extract()
|
||||
for item in soup.findAll('p', attrs={'class':'wykop'}):
|
||||
item.extract()
|
||||
|
||||
return soup
|
||||
return soup
|
||||
|
@ -1,10 +1,9 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.la-razon.com
|
||||
'''
|
||||
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class LaRazon_Bol(BasicNewsRecipe):
|
||||
@ -16,19 +15,17 @@ class LaRazon_Bol(BasicNewsRecipe):
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
encoding = 'cp1252'
|
||||
encoding = 'utf8'
|
||||
use_embedded_content = False
|
||||
language = 'es_BO'
|
||||
publication_type = 'newspaper'
|
||||
delay = 1
|
||||
remove_empty_feeds = True
|
||||
cover_url = strftime('http://www.la-razon.com/portadas/%Y%m%d_LaRazon.jpg')
|
||||
masthead_url = 'http://www.la-razon.com/imagenes/logo.jpg'
|
||||
extra_css = """ body{font-family: Arial,Helvetica,sans-serif }
|
||||
img{margin-bottom: 0.4em}
|
||||
.noticia-titulo{font-family: Georgia,"Times New Roman",Times,serif}
|
||||
.lead{font-weight: bold; font-size: 0.8em}
|
||||
"""
|
||||
masthead_url = 'http://www.la-razon.com/static/LRZRazon/images/lrz-logo.png'
|
||||
extra_css = """ body{font-family: Georgia,"Times New Roman",Times,serif}
|
||||
img{margin-bottom: 0.4em; display: block}
|
||||
.meta{font-size: small; font-family: Arial,Helvetica,sans-serif}
|
||||
"""
|
||||
INDEX = 'http://www.la-razon.com/'
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
@ -37,28 +34,37 @@ class LaRazon_Bol(BasicNewsRecipe):
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['noticia-titulo','noticia-desarrollo']})]
|
||||
remove_tags = [dict(name=['meta','link','form','iframe','embed','object'])]
|
||||
keep_only_tags = [dict(name='div', attrs={'class':['pg-hd', 'pg-bd']})]
|
||||
remove_tags = [
|
||||
dict(name=['meta','link','form','iframe','embed','object'])
|
||||
,dict(name='div', attrs={'class':'bd'})
|
||||
]
|
||||
remove_attributes = ['width','height']
|
||||
|
||||
feeds = [
|
||||
(u'Editorial' , u'http://www.la-razon.com/rss_editorial.php' )
|
||||
,(u'Opinión' , u'http://www.la-razon.com/rss_opinion.php' )
|
||||
,(u'Nacional' , u'http://www.la-razon.com/rss_nacional.php' )
|
||||
,(u'Economia' , u'http://www.la-razon.com/rss_economia.php' )
|
||||
,(u'Ciudades' , u'http://www.la-razon.com/rss_ciudades.php' )
|
||||
,(u'Sociedad' , u'http://www.la-razon.com/rss_sociedad.php' )
|
||||
,(u'Mundo' , u'http://www.la-razon.com/rss_sociedad.php' )
|
||||
,(u'La Revista' , u'http://www.la-razon.com/rss_larevista.php' )
|
||||
,(u'Sociales' , u'http://www.la-razon.com/rss_sociales.php' )
|
||||
,(u'Mia' , u'http://www.la-razon.com/rss_mia.php' )
|
||||
,(u'Marcas' , u'http://www.la-razon.com/rss_marcas.php' )
|
||||
,(u'Escape' , u'http://www.la-razon.com/rss_escape.php' )
|
||||
,(u'El Financiero' , u'http://www.la-razon.com/rss_financiero.php')
|
||||
,(u'Tendencias' , u'http://www.la-razon.com/rss_tendencias.php')
|
||||
(u'Editorial' , u'http://www.la-razon.com/rss/opinion/editorial/' )
|
||||
,(u'Nacional' , u'http://www.la-razon.com/rss/nacional/' )
|
||||
,(u'Economia' , u'http://www.la-razon.com/rss/economia/' )
|
||||
,(u'Ciudades' , u'http://www.la-razon.com/rss/ciudades/' )
|
||||
,(u'Sociedad' , u'http://www.la-razon.com/rss/sociedad/' )
|
||||
,(u'Mundo' , u'http://www.la-razon.com/rss/mundo/' )
|
||||
,(u'La Revista' , u'http://www.la-razon.com/rss/la_revista/' )
|
||||
,(u'Sociales' , u'http://www.la-razon.com/rss/sociales/' )
|
||||
,(u'Mia' , u'http://www.la-razon.com/rss/suplementos/mia/' )
|
||||
,(u'Marcas' , u'http://www.la-razon.com/rss/marcas/' )
|
||||
,(u'Escape' , u'http://www.la-razon.com/rss/suplementos/escape/' )
|
||||
,(u'El Financiero' , u'http://www.la-razon.com/rss/suplementos/financiero/')
|
||||
,(u'Tendencias' , u'http://www.la-razon.com/rss/suplementos/tendencias/')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
def get_cover_url(self):
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
lightbox = soup.find('div', attrs = {'class' : 'lightbox lightbox-frontpage'})
|
||||
return lightbox.img['src']
|
||||
|
||||
|
||||
|
14
recipes/lega_nerd.recipe
Normal file
14
recipes/lega_nerd.recipe
Normal file
@ -0,0 +1,14 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1326135232(BasicNewsRecipe):
|
||||
title = u'Lega Nerd'
|
||||
description = 'nerd / geek culture, pc, comics, music, culture'
|
||||
language = 'it'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
|
||||
feeds = [(u'Lega Nerd', u'http://feeds.feedburner.com/LegaNerd')]
|
||||
__author__ = 'faber1971'
|
||||
__version__ = 'v1.0'
|
||||
__date__ = '9, January 2011'
|
@ -41,7 +41,7 @@ class LosTiempos_Bol(BasicNewsRecipe):
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'articulo'})]
|
||||
remove_tags = [
|
||||
dict(name=['meta','link','form','iframe','embed','object','hr'])
|
||||
,dict(attrs={'class':['caja_fonts sin_border_bot','pub']})
|
||||
,dict(attrs={'class':['caja_fonts sin_border_bot','pub','twitter-share-button']})
|
||||
]
|
||||
remove_attributes = ['width','height']
|
||||
|
||||
|
@ -14,8 +14,11 @@ class WeeklyLWN(BasicNewsRecipe):
|
||||
description = 'Weekly summary of what has happened in the free software world.'
|
||||
__author__ = 'Davide Cavalca'
|
||||
language = 'en'
|
||||
site_url = 'http://lwn.net'
|
||||
|
||||
cover_url = 'http://lwn.net/images/lcorner.png'
|
||||
extra_css = 'pre,code,samp,kbd,tt { font-size: 80% }\nblockquote {margin-left:0 }\n* { color: black }\n'
|
||||
|
||||
cover_url = site_url + '/images/lcorner.png'
|
||||
#masthead_url = 'http://lwn.net/images/lcorner.png'
|
||||
publication_type = 'magazine'
|
||||
|
||||
@ -43,11 +46,29 @@ class WeeklyLWN(BasicNewsRecipe):
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
def print_version(self, url):
|
||||
|
||||
# Strip off anchor
|
||||
url = url.split('#')[0]
|
||||
|
||||
# Prepend site_url
|
||||
if url[0:len(self.site_url)] != self.site_url:
|
||||
url = self.site_url + url
|
||||
|
||||
# Append printable URL parameter
|
||||
print_param = '?format=printable'
|
||||
if url[-len(print_param):] != print_param:
|
||||
url += print_param
|
||||
|
||||
#import sys
|
||||
#print >>sys.stderr, "*** print_version(url):", url
|
||||
return url
|
||||
|
||||
def parse_index(self):
|
||||
if self.username is not None and self.password is not None:
|
||||
index_url = 'http://lwn.net/current/bigpage?format=printable'
|
||||
index_url = self.print_version('/current/bigpage')
|
||||
else:
|
||||
index_url = 'http://lwn.net/free/bigpage?format=printable'
|
||||
index_url = self.print_version('/free/bigpage')
|
||||
soup = self.index_to_soup(index_url)
|
||||
body = soup.body
|
||||
|
||||
@ -56,19 +77,19 @@ class WeeklyLWN(BasicNewsRecipe):
|
||||
url_re = re.compile('^/Articles/')
|
||||
|
||||
while True:
|
||||
tag_title = body.findNext(name='p', attrs={'class':'SummaryHL'})
|
||||
tag_title = body.findNext(attrs={'class':'SummaryHL'})
|
||||
if tag_title == None:
|
||||
break
|
||||
|
||||
tag_section = tag_title.findPrevious(name='p', attrs={'class':'Cat1HL'})
|
||||
tag_section = tag_title.findPrevious(attrs={'class':'Cat1HL'})
|
||||
if tag_section == None:
|
||||
section = 'Front Page'
|
||||
else:
|
||||
section = tag_section.string
|
||||
|
||||
tag_section2 = tag_title.findPrevious(name='p', attrs={'class':'Cat2HL'})
|
||||
tag_section2 = tag_title.findPrevious(attrs={'class':'Cat2HL'})
|
||||
if tag_section2 != None:
|
||||
if tag_section2.findPrevious(name='p', attrs={'class':'Cat1HL'}) == tag_section:
|
||||
if tag_section2.findPrevious(attrs={'class':'Cat1HL'}) == tag_section:
|
||||
section = "%s: %s" %(section, tag_section2.string)
|
||||
|
||||
if section not in articles.keys():
|
||||
@ -94,9 +115,10 @@ class WeeklyLWN(BasicNewsRecipe):
|
||||
if tag_url == None:
|
||||
break
|
||||
|
||||
|
||||
article = dict(
|
||||
title=self.tag_to_string(tag_title),
|
||||
url= 'http://lwn.net' + tag_url['href'].split('#')[0] + '?format=printable',
|
||||
url=tag_url['href'],
|
||||
description='', content='', date='')
|
||||
articles[section].append(article)
|
||||
|
||||
|
23
recipes/macity.recipe
Normal file
23
recipes/macity.recipe
Normal file
@ -0,0 +1,23 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1325766771(BasicNewsRecipe):
|
||||
title = u'Macity'
|
||||
language = 'it'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
|
||||
def get_article_url(self, article):
|
||||
link = BasicNewsRecipe.get_article_url(self, article)
|
||||
if link.split('/')[-1]=="story01.htm":
|
||||
link=link.split('/')[-2]
|
||||
a=['A', 'B', 'C', 'D', 'E', 'F', 'G', 'I', 'L' , 'N' , 'S' ]
|
||||
b=['0', '.', '/', '?', '-', '=', '&', '_', 'http://', '.com', 'www.']
|
||||
for i in range(0,len(a)):
|
||||
link=link.replace('0'+a[-i],b[-i])
|
||||
return link
|
||||
|
||||
feeds = [(u'Macity', u'http://www.macitynet.it.feedsportal.com/c/33714/f/599513/index.rss')]
|
||||
__author__ = 'faber1971'
|
||||
description = 'Apple and hi-tech news'
|
||||
|
@ -10,6 +10,10 @@ __MakePeriodical__ = True
|
||||
__UseChineseTitle__ = False
|
||||
# Set it to False if you want to skip images (Default: True)
|
||||
__KeepImages__ = True
|
||||
# Set it to True if you want to include a summary in Kindle's article view (Default: False)
|
||||
__IncludeSummary__ = False
|
||||
# Set it to True if you want thumbnail images in Kindle's article view (Default: True)
|
||||
__IncludeThumbnails__ = True
|
||||
# (HK only) Turn below to True if you wish to use life.mingpao.com as the main article source (Default: True)
|
||||
__UseLife__ = True
|
||||
# (HK only) It is to disable premium content (Default: False)
|
||||
@ -24,6 +28,9 @@ __Date__ = ''
|
||||
|
||||
'''
|
||||
Change Log:
|
||||
2011/12/18: update the overridden create_odf(.) routine with the one from Calibre version 0.8.31. Move __UseChineseTitle__ usage away
|
||||
from create_odf(.). Optional support of text_summary and thumbnail images in Kindle's article view. Start new day
|
||||
download of Hong Kong Mingpao at 4.30am. Set the actual publication date shown on kindle device.
|
||||
2011/12/01: take care of situation that in txt source parsing, the article content does start with special character u'\u3010'
|
||||
2011/10/21: fix a bug that hi-res img is unavailable in pages parsed from source txt
|
||||
2011/10/19: fix a bug in txt source parsing
|
||||
@ -53,6 +60,7 @@ Change Log:
|
||||
2010/10/31: skip repeated articles in section pages
|
||||
'''
|
||||
|
||||
from calibre.utils.date import now as nowf
|
||||
import os, datetime, re, mechanize
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from contextlib import nested
|
||||
@ -60,11 +68,15 @@ from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
from calibre.ebooks.metadata.toc import TOC
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
from calibre.utils.localization import canonicalize_lang
|
||||
|
||||
# MAIN CLASS
|
||||
class MPRecipe(BasicNewsRecipe):
|
||||
if __Region__ == 'Hong Kong':
|
||||
title = 'Ming Pao - Hong Kong'
|
||||
if __UseChineseTitle__ == True:
|
||||
title = u'\u660e\u5831 (\u9999\u6e2f)'
|
||||
else:
|
||||
title = 'Ming Pao - Hong Kong'
|
||||
description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
|
||||
category = 'Chinese, News, Hong Kong'
|
||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} font>b {font-size:200%; font-weight:bold;} div[class=heading] {font-size:200%; font-weight:bold;} div[class=images] {font-size:50%;}'
|
||||
@ -109,7 +121,10 @@ class MPRecipe(BasicNewsRecipe):
|
||||
lambda match: "</b>")
|
||||
]
|
||||
elif __Region__ == 'Vancouver':
|
||||
title = 'Ming Pao - Vancouver'
|
||||
if __UseChineseTitle__ == True:
|
||||
title = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
|
||||
else:
|
||||
title = 'Ming Pao - Vancouver'
|
||||
description = 'Vancouver Chinese Newspaper (http://www.mingpaovan.com)'
|
||||
category = 'Chinese, News, Vancouver'
|
||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
|
||||
@ -127,7 +142,10 @@ class MPRecipe(BasicNewsRecipe):
|
||||
lambda match: ''),
|
||||
]
|
||||
elif __Region__ == 'Toronto':
|
||||
title = 'Ming Pao - Toronto'
|
||||
if __UseChineseTitle__ == True:
|
||||
title = u'\u660e\u5831 (\u591a\u502b\u591a)'
|
||||
else:
|
||||
title = 'Ming Pao - Toronto'
|
||||
description = 'Toronto Chinese Newspaper (http://www.mingpaotor.com)'
|
||||
category = 'Chinese, News, Toronto'
|
||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
|
||||
@ -161,9 +179,9 @@ class MPRecipe(BasicNewsRecipe):
|
||||
def get_dtlocal(self):
|
||||
dt_utc = datetime.datetime.utcnow()
|
||||
if __Region__ == 'Hong Kong':
|
||||
# convert UTC to local hk time - at HKT 5.30am, all news are available
|
||||
dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(5.5/24)
|
||||
# dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(5.5/24)
|
||||
# convert UTC to local hk time - at HKT 4.30am, all news are available
|
||||
dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(4.5/24)
|
||||
# dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(4.5/24)
|
||||
elif __Region__ == 'Vancouver':
|
||||
# convert UTC to local Vancouver time - at PST time 5.30am, all news are available
|
||||
dt_local = dt_utc + datetime.timedelta(-8.0/24) - datetime.timedelta(5.5/24)
|
||||
@ -186,6 +204,18 @@ class MPRecipe(BasicNewsRecipe):
|
||||
else:
|
||||
return self.get_dtlocal().strftime("%Y-%m-%d")
|
||||
|
||||
def get_fetchyear(self):
|
||||
if __Date__ <> '':
|
||||
return __Date__[0:4]
|
||||
else:
|
||||
return self.get_dtlocal().strftime("%Y")
|
||||
|
||||
def get_fetchmonth(self):
|
||||
if __Date__ <> '':
|
||||
return __Date__[4:6]
|
||||
else:
|
||||
return self.get_dtlocal().strftime("%m")
|
||||
|
||||
def get_fetchday(self):
|
||||
if __Date__ <> '':
|
||||
return __Date__[6:8]
|
||||
@ -654,77 +684,153 @@ class MPRecipe(BasicNewsRecipe):
|
||||
del item['absmiddle']
|
||||
return soup
|
||||
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
# thumbnails shouldn't be available if using hi-res images
|
||||
if __IncludeThumbnails__ and __HiResImg__ == False and first and hasattr(self, 'add_toc_thumbnail'):
|
||||
img = soup.find('img')
|
||||
if img is not None:
|
||||
self.add_toc_thumbnail(article, img['src'])
|
||||
|
||||
try:
|
||||
if __IncludeSummary__ and len(article.text_summary.strip()) == 0:
|
||||
# look for content
|
||||
articlebodies = soup.findAll('div',attrs={'id':'newscontent'})
|
||||
if not articlebodies:
|
||||
articlebodies = soup.findAll('div',attrs={'id':'newscontent01'})
|
||||
if not articlebodies:
|
||||
articlebodies = soup.findAll('div',attrs={'class':'content'})
|
||||
if not articlebodies:
|
||||
articlebodies = soup.findAll('div', attrs={'id':'font'})
|
||||
if articlebodies:
|
||||
for articlebody in articlebodies:
|
||||
if articlebody:
|
||||
# the text may or may not be enclosed in <p></p> tag
|
||||
paras = articlebody.findAll('p')
|
||||
if not paras:
|
||||
paras = articlebody
|
||||
textFound = False
|
||||
for p in paras:
|
||||
if not textFound:
|
||||
summary_candidate = self.tag_to_string(p).strip()
|
||||
summary_candidate = summary_candidate.replace(u'\u3010\u660e\u5831\u5c08\u8a0a\u3011', '', 1)
|
||||
if len(summary_candidate) > 0:
|
||||
article.summary = article.text_summary = summary_candidate
|
||||
textFound = True
|
||||
else:
|
||||
# display a simple text
|
||||
#article.summary = article.text_summary = u'\u66f4\u591a......'
|
||||
# display word counts
|
||||
counts = 0
|
||||
articlebodies = soup.findAll('div',attrs={'id':'newscontent'})
|
||||
if not articlebodies:
|
||||
articlebodies = soup.findAll('div',attrs={'id':'newscontent01'})
|
||||
if not articlebodies:
|
||||
articlebodies = soup.findAll('div',attrs={'class':'content'})
|
||||
if not articlebodies:
|
||||
articlebodies = soup.findAll('div', attrs={'id':'font'})
|
||||
if articlebodies:
|
||||
for articlebody in articlebodies:
|
||||
# the text may or may not be enclosed in <p></p> tag
|
||||
paras = articlebody.findAll('p')
|
||||
if not paras:
|
||||
paras = articlebody
|
||||
for p in paras:
|
||||
summary_candidate = self.tag_to_string(p).strip()
|
||||
counts += len(summary_candidate)
|
||||
article.summary = article.text_summary = u'\uff08' + str(counts) + u'\u5b57\uff09'
|
||||
except:
|
||||
self.log("Error creating article descriptions")
|
||||
return
|
||||
|
||||
# override from the one in version 0.8.31
|
||||
def create_opf(self, feeds, dir=None):
|
||||
if dir is None:
|
||||
dir = self.output_dir
|
||||
if __UseChineseTitle__ == True:
|
||||
if __Region__ == 'Hong Kong':
|
||||
title = u'\u660e\u5831 (\u9999\u6e2f)'
|
||||
elif __Region__ == 'Vancouver':
|
||||
title = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
|
||||
elif __Region__ == 'Toronto':
|
||||
title = u'\u660e\u5831 (\u591a\u502b\u591a)'
|
||||
else:
|
||||
title = self.short_title()
|
||||
# if not generating a periodical, force date to apply in title
|
||||
if __MakePeriodical__ == False:
|
||||
title = self.short_title()
|
||||
# change 1: allow our own flag to tell if a periodical is to be generated
|
||||
# also use customed date instead of current time
|
||||
if __MakePeriodical__ == False or self.output_profile.periodical_date_in_title:
|
||||
title = title + ' ' + self.get_fetchformatteddate()
|
||||
if True:
|
||||
mi = MetaInformation(title, [self.publisher])
|
||||
mi.publisher = self.publisher
|
||||
mi.author_sort = self.publisher
|
||||
if __MakePeriodical__ == True:
|
||||
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
|
||||
else:
|
||||
mi.publication_type = self.publication_type+':'+self.short_title()
|
||||
#mi.timestamp = nowf()
|
||||
mi.timestamp = self.get_dtlocal()
|
||||
mi.comments = self.description
|
||||
if not isinstance(mi.comments, unicode):
|
||||
mi.comments = mi.comments.decode('utf-8', 'replace')
|
||||
#mi.pubdate = nowf()
|
||||
mi.pubdate = self.get_dtlocal()
|
||||
opf_path = os.path.join(dir, 'index.opf')
|
||||
ncx_path = os.path.join(dir, 'index.ncx')
|
||||
opf = OPFCreator(dir, mi)
|
||||
# Add mastheadImage entry to <guide> section
|
||||
mp = getattr(self, 'masthead_path', None)
|
||||
if mp is not None and os.access(mp, os.R_OK):
|
||||
from calibre.ebooks.metadata.opf2 import Guide
|
||||
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
|
||||
ref.type = 'masthead'
|
||||
ref.title = 'Masthead Image'
|
||||
opf.guide.append(ref)
|
||||
# end of change 1
|
||||
# change 2: __appname__ replaced by newspaper publisher
|
||||
__appname__ = self.publisher
|
||||
mi = MetaInformation(title, [__appname__])
|
||||
mi.publisher = __appname__
|
||||
mi.author_sort = __appname__
|
||||
# change 3: use __MakePeriodical__ flag to tell if a periodical should be generated
|
||||
if __MakePeriodical__ == True:
|
||||
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
|
||||
else:
|
||||
mi.publication_type = self.publication_type+':'+self.short_title()
|
||||
#mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
|
||||
# change 4: in the following, all the nowf() are changed to adjusted time
|
||||
# This one doesn't matter
|
||||
mi.timestamp = nowf()
|
||||
# change 5: skip listing the articles
|
||||
#article_titles, aseen = [], set()
|
||||
#for f in feeds:
|
||||
# for a in f:
|
||||
# if a.title and a.title not in aseen:
|
||||
# aseen.add(a.title)
|
||||
# article_titles.append(force_unicode(a.title, 'utf-8'))
|
||||
|
||||
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
|
||||
manifest.append(os.path.join(dir, 'index.html'))
|
||||
manifest.append(os.path.join(dir, 'index.ncx'))
|
||||
#mi.comments = self.description
|
||||
#if not isinstance(mi.comments, unicode):
|
||||
# mi.comments = mi.comments.decode('utf-8', 'replace')
|
||||
#mi.comments += ('\n\n' + _('Articles in this issue: ') + '\n' +
|
||||
# '\n\n'.join(article_titles))
|
||||
|
||||
# Get cover
|
||||
cpath = getattr(self, 'cover_path', None)
|
||||
if cpath is None:
|
||||
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
|
||||
if self.default_cover(pf):
|
||||
cpath = pf.name
|
||||
if cpath is not None and os.access(cpath, os.R_OK):
|
||||
opf.cover = cpath
|
||||
manifest.append(cpath)
|
||||
language = canonicalize_lang(self.language)
|
||||
if language is not None:
|
||||
mi.language = language
|
||||
# This one affects the pub date shown in kindle title
|
||||
#mi.pubdate = nowf()
|
||||
# now appears to need the time field to be > 12.00noon as well
|
||||
mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0)
|
||||
opf_path = os.path.join(dir, 'index.opf')
|
||||
ncx_path = os.path.join(dir, 'index.ncx')
|
||||
|
||||
# Get masthead
|
||||
mpath = getattr(self, 'masthead_path', None)
|
||||
if mpath is not None and os.access(mpath, os.R_OK):
|
||||
manifest.append(mpath)
|
||||
opf = OPFCreator(dir, mi)
|
||||
# Add mastheadImage entry to <guide> section
|
||||
mp = getattr(self, 'masthead_path', None)
|
||||
if mp is not None and os.access(mp, os.R_OK):
|
||||
from calibre.ebooks.metadata.opf2 import Guide
|
||||
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
|
||||
ref.type = 'masthead'
|
||||
ref.title = 'Masthead Image'
|
||||
opf.guide.append(ref)
|
||||
|
||||
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
|
||||
manifest.append(os.path.join(dir, 'index.html'))
|
||||
manifest.append(os.path.join(dir, 'index.ncx'))
|
||||
|
||||
# Get cover
|
||||
cpath = getattr(self, 'cover_path', None)
|
||||
if cpath is None:
|
||||
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
|
||||
if self.default_cover(pf):
|
||||
cpath = pf.name
|
||||
if cpath is not None and os.access(cpath, os.R_OK):
|
||||
opf.cover = cpath
|
||||
manifest.append(cpath)
|
||||
|
||||
# Get masthead
|
||||
mpath = getattr(self, 'masthead_path', None)
|
||||
if mpath is not None and os.access(mpath, os.R_OK):
|
||||
manifest.append(mpath)
|
||||
|
||||
opf.create_manifest_from_files_in(manifest)
|
||||
for mani in opf.manifest:
|
||||
if mani.path.endswith('.ncx'):
|
||||
mani.id = 'ncx'
|
||||
if mani.path.endswith('mastheadImage.jpg'):
|
||||
mani.id = 'masthead-image'
|
||||
|
||||
entries = ['index.html']
|
||||
toc = TOC(base_path=dir)
|
||||
self.play_order_counter = 0
|
||||
self.play_order_map = {}
|
||||
|
||||
opf.create_manifest_from_files_in(manifest)
|
||||
for mani in opf.manifest:
|
||||
if mani.path.endswith('.ncx'):
|
||||
mani.id = 'ncx'
|
||||
if mani.path.endswith('mastheadImage.jpg'):
|
||||
mani.id = 'masthead-image'
|
||||
entries = ['index.html']
|
||||
toc = TOC(base_path=dir)
|
||||
self.play_order_counter = 0
|
||||
self.play_order_map = {}
|
||||
|
||||
def feed_index(num, parent):
|
||||
f = feeds[num]
|
||||
@ -739,13 +845,16 @@ class MPRecipe(BasicNewsRecipe):
|
||||
desc = None
|
||||
else:
|
||||
desc = self.description_limiter(desc)
|
||||
tt = a.toc_thumbnail if a.toc_thumbnail else None
|
||||
entries.append('%sindex.html'%adir)
|
||||
po = self.play_order_map.get(entries[-1], None)
|
||||
if po is None:
|
||||
self.play_order_counter += 1
|
||||
po = self.play_order_counter
|
||||
parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
|
||||
play_order=po, author=auth, description=desc)
|
||||
parent.add_item('%sindex.html'%adir, None,
|
||||
a.title if a.title else _('Untitled Article'),
|
||||
play_order=po, author=auth,
|
||||
description=desc, toc_thumbnail=tt)
|
||||
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
|
||||
for sp in a.sub_pages:
|
||||
prefix = os.path.commonprefix([opf_path, sp])
|
||||
@ -762,7 +871,7 @@ class MPRecipe(BasicNewsRecipe):
|
||||
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
|
||||
templ = self.navbar.generate(True, num, j, len(f),
|
||||
not self.has_single_feed,
|
||||
a.orig_url, self.publisher, prefix=prefix,
|
||||
a.orig_url, __appname__, prefix=prefix,
|
||||
center=self.center_navbar)
|
||||
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
|
||||
body.insert(len(body.contents), elem)
|
||||
@ -785,7 +894,7 @@ class MPRecipe(BasicNewsRecipe):
|
||||
if not desc:
|
||||
desc = None
|
||||
feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
|
||||
f.title, play_order=po, description=desc, author=auth))
|
||||
f.title, play_order=po, description=desc, author=auth))
|
||||
|
||||
else:
|
||||
entries.append('feed_%d/index.html'%0)
|
||||
@ -799,3 +908,4 @@ class MPRecipe(BasicNewsRecipe):
|
||||
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
|
||||
opf.render(opf_file, ncx_file)
|
||||
|
||||
|
||||
|
@ -4,18 +4,41 @@ __copyright__ = '2010-2011, Eddie Lau'
|
||||
# Region - Hong Kong, Vancouver, Toronto
|
||||
__Region__ = 'Toronto'
|
||||
# Users of Kindle 3 with limited system-level CJK support
|
||||
# please replace the following "True" with "False".
|
||||
# please replace the following "True" with "False". (Default: True)
|
||||
__MakePeriodical__ = True
|
||||
# Turn below to true if your device supports display of CJK titles
|
||||
# Turn below to True if your device supports display of CJK titles (Default: False)
|
||||
__UseChineseTitle__ = False
|
||||
# Set it to False if you want to skip images
|
||||
# Set it to False if you want to skip images (Default: True)
|
||||
__KeepImages__ = True
|
||||
# (HK only) Turn below to true if you wish to use life.mingpao.com as the main article source
|
||||
# Set it to True if you want to include a summary in Kindle's article view (Default: False)
|
||||
__IncludeSummary__ = False
|
||||
# Set it to True if you want thumbnail images in Kindle's article view (Default: True)
|
||||
__IncludeThumbnails__ = True
|
||||
# (HK only) Turn below to True if you wish to use life.mingpao.com as the main article source (Default: True)
|
||||
__UseLife__ = True
|
||||
# (HK only) It is to disable premium content (Default: False)
|
||||
__InclPremium__ = False
|
||||
# (HK only) Turn below to True if you wish to parse articles in news.mingpao.com with their printer-friendly formats (Default: True)
|
||||
__ParsePFF__ = True
|
||||
# (HK only) Turn below to True if you wish hi-res images (Default: False)
|
||||
__HiResImg__ = False
|
||||
# Override the date returned by the program if specifying a YYYYMMDD below
|
||||
__Date__ = ''
|
||||
|
||||
|
||||
'''
|
||||
Change Log:
|
||||
2011/12/18: update the overridden create_odf(.) routine with the one from Calibre version 0.8.31. Move __UseChineseTitle__ usage away
|
||||
from create_odf(.). Optional support of text_summary and thumbnail images in Kindle's article view. Start new day
|
||||
download of Hong Kong Mingpao at 4.30am. Set the actual publication date shown on kindle device.
|
||||
2011/12/01: take care of situation that in txt source parsing, the article content does start with special character u'\u3010'
|
||||
2011/10/21: fix a bug that hi-res img is unavailable in pages parsed from source txt
|
||||
2011/10/19: fix a bug in txt source parsing
|
||||
2011/10/17: disable fetching of premium content, also improved txt source parsing
|
||||
2011/10/04: option to get hi-res photos for the articles
|
||||
2011/09/21: fetching "column" section is made optional.
|
||||
2011/09/18: parse "column" section stuff from source text file directly.
|
||||
2011/09/07: disable "column" section as it is no longer offered free.
|
||||
2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
|
||||
provide options to remove all images in the file
|
||||
2011/05/12: switch the main parse source to life.mingpao.com, which has more photos on the article pages
|
||||
@ -37,30 +60,38 @@ Change Log:
|
||||
2010/10/31: skip repeated articles in section pages
|
||||
'''
|
||||
|
||||
import os, datetime, re
|
||||
from calibre.utils.date import now as nowf
|
||||
import os, datetime, re, mechanize
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from contextlib import nested
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
from calibre.ebooks.metadata.toc import TOC
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
from calibre.utils.localization import canonicalize_lang
|
||||
|
||||
# MAIN CLASS
|
||||
class MPRecipe(BasicNewsRecipe):
|
||||
if __Region__ == 'Hong Kong':
|
||||
title = 'Ming Pao - Hong Kong'
|
||||
if __UseChineseTitle__ == True:
|
||||
title = u'\u660e\u5831 (\u9999\u6e2f)'
|
||||
else:
|
||||
title = 'Ming Pao - Hong Kong'
|
||||
description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
|
||||
category = 'Chinese, News, Hong Kong'
|
||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
|
||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} font>b {font-size:200%; font-weight:bold;} div[class=heading] {font-size:200%; font-weight:bold;} div[class=images] {font-size:50%;}'
|
||||
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
|
||||
keep_only_tags = [dict(name='h1'),
|
||||
dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
|
||||
dict(name='font', attrs={'color':['AA0000']}), # for column articles title
|
||||
dict(attrs={'class':['heading']}), # for heading from txt
|
||||
dict(attrs={'id':['newscontent']}), # entertainment and column page content
|
||||
dict(attrs={'id':['newscontent01','newscontent02']}),
|
||||
dict(attrs={'class':['content']}), # for content from txt
|
||||
dict(attrs={'class':['photo']}),
|
||||
dict(name='table', attrs={'width':['100%'], 'border':['0'], 'cellspacing':['5'], 'cellpadding':['0']}), # content in printed version of life.mingpao.com
|
||||
dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}) # images for source from life.mingpao.com
|
||||
dict(name='img', attrs={'width':['180'], 'alt':['????']}), # images for source from life.mingpao.com
|
||||
dict(attrs={'class':['images']}) # for images from txt
|
||||
]
|
||||
if __KeepImages__:
|
||||
remove_tags = [dict(name='style'),
|
||||
@ -90,7 +121,10 @@ class MPRecipe(BasicNewsRecipe):
|
||||
lambda match: "</b>")
|
||||
]
|
||||
elif __Region__ == 'Vancouver':
|
||||
title = 'Ming Pao - Vancouver'
|
||||
if __UseChineseTitle__ == True:
|
||||
title = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
|
||||
else:
|
||||
title = 'Ming Pao - Vancouver'
|
||||
description = 'Vancouver Chinese Newspaper (http://www.mingpaovan.com)'
|
||||
category = 'Chinese, News, Vancouver'
|
||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
|
||||
@ -108,7 +142,10 @@ class MPRecipe(BasicNewsRecipe):
|
||||
lambda match: ''),
|
||||
]
|
||||
elif __Region__ == 'Toronto':
|
||||
title = 'Ming Pao - Toronto'
|
||||
if __UseChineseTitle__ == True:
|
||||
title = u'\u660e\u5831 (\u591a\u502b\u591a)'
|
||||
else:
|
||||
title = 'Ming Pao - Toronto'
|
||||
description = 'Toronto Chinese Newspaper (http://www.mingpaotor.com)'
|
||||
category = 'Chinese, News, Toronto'
|
||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
|
||||
@ -139,49 +176,12 @@ class MPRecipe(BasicNewsRecipe):
|
||||
conversion_options = {'linearize_tables':True}
|
||||
timefmt = ''
|
||||
|
||||
def image_url_processor(cls, baseurl, url):
|
||||
# trick: break the url at the first occurance of digit, add an additional
|
||||
# '_' at the front
|
||||
# not working, may need to move this to preprocess_html() method
|
||||
# minIdx = 10000
|
||||
# i0 = url.find('0')
|
||||
# if i0 >= 0 and i0 < minIdx:
|
||||
# minIdx = i0
|
||||
# i1 = url.find('1')
|
||||
# if i1 >= 0 and i1 < minIdx:
|
||||
# minIdx = i1
|
||||
# i2 = url.find('2')
|
||||
# if i2 >= 0 and i2 < minIdx:
|
||||
# minIdx = i2
|
||||
# i3 = url.find('3')
|
||||
# if i3 >= 0 and i0 < minIdx:
|
||||
# minIdx = i3
|
||||
# i4 = url.find('4')
|
||||
# if i4 >= 0 and i4 < minIdx:
|
||||
# minIdx = i4
|
||||
# i5 = url.find('5')
|
||||
# if i5 >= 0 and i5 < minIdx:
|
||||
# minIdx = i5
|
||||
# i6 = url.find('6')
|
||||
# if i6 >= 0 and i6 < minIdx:
|
||||
# minIdx = i6
|
||||
# i7 = url.find('7')
|
||||
# if i7 >= 0 and i7 < minIdx:
|
||||
# minIdx = i7
|
||||
# i8 = url.find('8')
|
||||
# if i8 >= 0 and i8 < minIdx:
|
||||
# minIdx = i8
|
||||
# i9 = url.find('9')
|
||||
# if i9 >= 0 and i9 < minIdx:
|
||||
# minIdx = i9
|
||||
return url
|
||||
|
||||
def get_dtlocal(self):
|
||||
dt_utc = datetime.datetime.utcnow()
|
||||
if __Region__ == 'Hong Kong':
|
||||
# convert UTC to local hk time - at HKT 5.30am, all news are available
|
||||
dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(5.5/24)
|
||||
# dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(5.5/24)
|
||||
# convert UTC to local hk time - at HKT 4.30am, all news are available
|
||||
dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(4.5/24)
|
||||
# dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(4.5/24)
|
||||
elif __Region__ == 'Vancouver':
|
||||
# convert UTC to local Vancouver time - at PST time 5.30am, all news are available
|
||||
dt_local = dt_utc + datetime.timedelta(-8.0/24) - datetime.timedelta(5.5/24)
|
||||
@ -193,13 +193,34 @@ class MPRecipe(BasicNewsRecipe):
|
||||
return dt_local
|
||||
|
||||
def get_fetchdate(self):
|
||||
return self.get_dtlocal().strftime("%Y%m%d")
|
||||
if __Date__ <> '':
|
||||
return __Date__
|
||||
else:
|
||||
return self.get_dtlocal().strftime("%Y%m%d")
|
||||
|
||||
def get_fetchformatteddate(self):
|
||||
return self.get_dtlocal().strftime("%Y-%m-%d")
|
||||
if __Date__ <> '':
|
||||
return __Date__[0:4]+'-'+__Date__[4:6]+'-'+__Date__[6:8]
|
||||
else:
|
||||
return self.get_dtlocal().strftime("%Y-%m-%d")
|
||||
|
||||
def get_fetchyear(self):
|
||||
if __Date__ <> '':
|
||||
return __Date__[0:4]
|
||||
else:
|
||||
return self.get_dtlocal().strftime("%Y")
|
||||
|
||||
def get_fetchmonth(self):
|
||||
if __Date__ <> '':
|
||||
return __Date__[4:6]
|
||||
else:
|
||||
return self.get_dtlocal().strftime("%m")
|
||||
|
||||
def get_fetchday(self):
|
||||
return self.get_dtlocal().strftime("%d")
|
||||
if __Date__ <> '':
|
||||
return __Date__[6:8]
|
||||
else:
|
||||
return self.get_dtlocal().strftime("%d")
|
||||
|
||||
def get_cover_url(self):
|
||||
if __Region__ == 'Hong Kong':
|
||||
@ -230,12 +251,23 @@ class MPRecipe(BasicNewsRecipe):
|
||||
(u'\u570b\u969b World', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalta', 'nal'),
|
||||
(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal'),
|
||||
(u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'),
|
||||
(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal'),
|
||||
(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')]:
|
||||
articles = self.parse_section2(url, keystr)
|
||||
(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal')
|
||||
]:
|
||||
if __InclPremium__ == True:
|
||||
articles = self.parse_section2_txt(url, keystr)
|
||||
else:
|
||||
articles = self.parse_section2(url, keystr)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
if __InclPremium__ == True:
|
||||
# parse column section articles directly from .txt files
|
||||
for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
|
||||
]:
|
||||
articles = self.parse_section2_txt(url, keystr)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
||||
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
@ -244,15 +276,16 @@ class MPRecipe(BasicNewsRecipe):
|
||||
else:
|
||||
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
|
||||
(u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
|
||||
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
|
||||
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'),
|
||||
(u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
# special- editorial
|
||||
ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
|
||||
if ed_articles:
|
||||
feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
|
||||
#ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
|
||||
#if ed_articles:
|
||||
# feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
|
||||
|
||||
for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
|
||||
(u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
|
||||
@ -263,20 +296,39 @@ class MPRecipe(BasicNewsRecipe):
|
||||
|
||||
# special - finance
|
||||
#fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
|
||||
fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
|
||||
if fin_articles:
|
||||
feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
|
||||
#fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
|
||||
#if fin_articles:
|
||||
# feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
|
||||
|
||||
for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
|
||||
(u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
for title, url, keystr in [(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal')]:
|
||||
articles = self.parse_section2_txt(url, keystr)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
#for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
|
||||
# (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
|
||||
# articles = self.parse_section(url)
|
||||
# if articles:
|
||||
# feeds.append((title, articles))
|
||||
|
||||
# special - entertainment
|
||||
ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
|
||||
if ent_articles:
|
||||
feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
|
||||
#ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
|
||||
#if ent_articles:
|
||||
# feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
|
||||
|
||||
for title, url, keystr in [(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal')
|
||||
]:
|
||||
articles = self.parse_section2_txt(url, keystr)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
if __InclPremium__ == True:
|
||||
# parse column section articles directly from .txt files
|
||||
for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
|
||||
]:
|
||||
articles = self.parse_section2_txt(url, keystr)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
||||
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
||||
@ -284,11 +336,6 @@ class MPRecipe(BasicNewsRecipe):
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
|
||||
# special- columns
|
||||
col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
|
||||
if col_articles:
|
||||
feeds.append((u'\u5c08\u6b04 Columns', col_articles))
|
||||
elif __Region__ == 'Vancouver':
|
||||
for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VAindex.htm'),
|
||||
(u'\u52a0\u570b Canada', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VBindex.htm'),
|
||||
@ -332,6 +379,16 @@ class MPRecipe(BasicNewsRecipe):
|
||||
title = self.tag_to_string(a)
|
||||
url = a.get('href', False)
|
||||
url = 'http://news.mingpao.com/' + dateStr + '/' +url
|
||||
# replace the url to the print-friendly version
|
||||
if __ParsePFF__ == True:
|
||||
if url.rfind('Redirect') <> -1 and __InclPremium__ == True:
|
||||
url = re.sub(dateStr + '.*' + dateStr, dateStr, url)
|
||||
url = re.sub('%2F.*%2F', '/', url)
|
||||
title = title.replace(u'\u6536\u8cbb\u5167\u5bb9', '')
|
||||
url = url.replace('%2Etxt', '_print.htm')
|
||||
url = url.replace('%5F', '_')
|
||||
else:
|
||||
url = url.replace('.htm', '_print.htm')
|
||||
if url not in included_urls and url.rfind('Redirect') == -1:
|
||||
current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
|
||||
included_urls.append(url)
|
||||
@ -340,6 +397,8 @@ class MPRecipe(BasicNewsRecipe):
|
||||
|
||||
# parse from life.mingpao.com
|
||||
def parse_section2(self, url, keystr):
|
||||
br = mechanize.Browser()
|
||||
br.set_handle_redirect(False)
|
||||
self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
a = soup.findAll('a', href=True)
|
||||
@ -350,7 +409,29 @@ class MPRecipe(BasicNewsRecipe):
|
||||
title = self.tag_to_string(i)
|
||||
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
|
||||
url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article
|
||||
try:
|
||||
br.open_novisit(url)
|
||||
url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article
|
||||
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||
included_urls.append(url)
|
||||
except:
|
||||
print 'skipping a premium article'
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
|
||||
# parse from text file of life.mingpao.com
|
||||
def parse_section2_txt(self, url, keystr):
|
||||
self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
a = soup.findAll('a', href=True)
|
||||
a.reverse()
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
for i in a:
|
||||
title = self.tag_to_string(i)
|
||||
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
|
||||
url = url.replace('cfm/dailynews3.cfm?File=', 'ftp/Life3/') # use printed version of the article
|
||||
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||
included_urls.append(url)
|
||||
current_articles.reverse()
|
||||
@ -438,6 +519,162 @@ class MPRecipe(BasicNewsRecipe):
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
|
||||
# preprocess those .txt and javascript based files
|
||||
def preprocess_raw_html(self, raw_html, url):
|
||||
new_html = raw_html
|
||||
if url.rfind('ftp') <> -1 or url.rfind('_print.htm') <> -1:
|
||||
if url.rfind('_print.htm') <> -1:
|
||||
# javascript based file
|
||||
splitter = re.compile(r'\n')
|
||||
new_raw_html = '<html><head><title>Untitled</title></head>'
|
||||
new_raw_html = new_raw_html + '<body>'
|
||||
for item in splitter.split(raw_html):
|
||||
if item.startswith('var heading1 ='):
|
||||
heading = item.replace('var heading1 = \'', '')
|
||||
heading = heading.replace('\'', '')
|
||||
heading = heading.replace(';', '')
|
||||
new_raw_html = new_raw_html + '<div class="heading">' + heading
|
||||
if item.startswith('var heading2 ='):
|
||||
heading = item.replace('var heading2 = \'', '')
|
||||
heading = heading.replace('\'', '')
|
||||
heading = heading.replace(';', '')
|
||||
if heading <> '':
|
||||
new_raw_html = new_raw_html + '<br>' + heading + '</div>'
|
||||
else:
|
||||
new_raw_html = new_raw_html + '</div>'
|
||||
if item.startswith('var content ='):
|
||||
content = item.replace("var content = ", '')
|
||||
content = content.replace('\'', '')
|
||||
content = content.replace(';', '')
|
||||
new_raw_html = new_raw_html + '<div class="content">' + content + '</div>'
|
||||
if item.startswith('var photocontent ='):
|
||||
photo = item.replace('var photocontent = \'', '')
|
||||
photo = photo.replace('\'', '')
|
||||
photo = photo.replace(';', '')
|
||||
photo = photo.replace('<tr>', '')
|
||||
photo = photo.replace('<td>', '')
|
||||
photo = photo.replace('</tr>', '')
|
||||
photo = photo.replace('</td>', '<br>')
|
||||
photo = photo.replace('class="photo"', '')
|
||||
new_raw_html = new_raw_html + '<div class="images">' + photo + '</div>'
|
||||
new_html = new_raw_html + '</body></html>'
|
||||
else:
|
||||
# .txt based file
|
||||
splitter = re.compile(r'\n') # Match non-digits
|
||||
new_raw_html = '<html><head><title>Untitled</title></head><body><div class="images">'
|
||||
next_is_img_txt = False
|
||||
title_started = False
|
||||
title_break_reached = False
|
||||
met_article_start_char = False
|
||||
for item in splitter.split(raw_html):
|
||||
item = item.strip()
|
||||
# if title already reached but break between title and content not yet found, record title_break_reached
|
||||
if title_started == True and title_break_reached == False and item == '':
|
||||
title_break_reached = True
|
||||
# if title reached and title_break_reached and met_article_start_char == False and item is not empty
|
||||
# start content
|
||||
elif title_started == True and title_break_reached == True and met_article_start_char == False:
|
||||
if item <> '':
|
||||
met_article_start_char = True
|
||||
new_raw_html = new_raw_html + '</div><div class="content"><p>' + item + '<p>\n'
|
||||
#if item.startswith(u'\u3010'):
|
||||
# met_article_start_char = True
|
||||
# new_raw_html = new_raw_html + '</div><div class="content"><p>' + item + '<p>\n'
|
||||
else:
|
||||
if next_is_img_txt == False:
|
||||
if item.startswith("=@"):
|
||||
print 'skip movie link'
|
||||
elif item.startswith("=?"):
|
||||
next_is_img_txt = True
|
||||
new_raw_html += '<img src="' + str(item)[2:].strip() + '.gif" /><p>\n'
|
||||
elif item.startswith('=='):
|
||||
next_is_img_txt = True
|
||||
if False:
|
||||
# TODO: check existence of .gif first
|
||||
newimg = '_' + item[2:].strip() + '.jpg'
|
||||
new_raw_html += '<img src="' + newimg + '" /><p>\n'
|
||||
else:
|
||||
new_raw_html += '<img src="' + str(item)[2:].strip() + '.jpg" /><p>\n'
|
||||
elif item.startswith('='):
|
||||
next_is_img_txt = True
|
||||
if False:
|
||||
# TODO: check existence of .gif first
|
||||
newimg = '_' + item[1:].strip() + '.jpg'
|
||||
new_raw_html += '<img src="' + newimg + '" /><p>\n'
|
||||
else:
|
||||
new_raw_html += '<img src="' + str(item)[1:].strip() + '.jpg" /><p>\n'
|
||||
else:
|
||||
if next_is_img_txt == False and met_article_start_char == False:
|
||||
if item <> '':
|
||||
if title_started == False:
|
||||
#print 'Title started at ', item
|
||||
new_raw_html = new_raw_html + '</div><div class="heading">' + item + '\n'
|
||||
title_started = True
|
||||
else:
|
||||
new_raw_html = new_raw_html + item + '\n'
|
||||
else:
|
||||
new_raw_html = new_raw_html + item + '<p>\n'
|
||||
else:
|
||||
next_is_img_txt = False
|
||||
new_raw_html = new_raw_html + item + '\n'
|
||||
new_html = new_raw_html + '</div></body></html>'
|
||||
#raw_html = raw_html.replace(u'<p>\u3010', u'\u3010')
|
||||
if __HiResImg__ == True:
|
||||
# TODO: add a _ in front of an image url
|
||||
if url.rfind('news.mingpao.com') > -1:
|
||||
imglist = re.findall('src="?.*?jpg"', new_html)
|
||||
br = mechanize.Browser()
|
||||
br.set_handle_redirect(False)
|
||||
for img in imglist:
|
||||
gifimg = img.replace('jpg"', 'gif"')
|
||||
try:
|
||||
br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
|
||||
new_html = new_html.replace(img, gifimg)
|
||||
except:
|
||||
# find the location of the first _
|
||||
pos = img.find('_')
|
||||
if pos > -1:
|
||||
# if found, insert _ after the first _
|
||||
newimg = img[0:pos] + '_' + img[pos:]
|
||||
new_html = new_html.replace(img, newimg)
|
||||
else:
|
||||
# if not found, insert _ after "
|
||||
new_html = new_html.replace(img[1:], '"_' + img[1:])
|
||||
elif url.rfind('life.mingpao.com') > -1:
|
||||
imglist = re.findall('src=\'?.*?jpg\'', new_html)
|
||||
br = mechanize.Browser()
|
||||
br.set_handle_redirect(False)
|
||||
#print 'Img list: ', imglist, '\n'
|
||||
for img in imglist:
|
||||
#print 'Found img: ', img
|
||||
gifimg = img.replace('jpg\'', 'gif\'')
|
||||
try:
|
||||
gifurl = re.sub(r'dailynews.*txt', '', url)
|
||||
br.open_novisit(gifurl + gifimg[5:len(gifimg)-1])
|
||||
new_html = new_html.replace(img, gifimg)
|
||||
except:
|
||||
pos = img.rfind('/')
|
||||
newimg = img[0:pos+1] + '_' + img[pos+1:]
|
||||
new_html = new_html.replace(img, newimg)
|
||||
# repeat with src quoted by double quotes, for text parsed from src txt
|
||||
imglist = re.findall('src="?.*?jpg"', new_html)
|
||||
for img in imglist:
|
||||
#print 'Found img: ', img
|
||||
gifimg = img.replace('jpg"', 'gif"')
|
||||
try:
|
||||
#print 'url', url
|
||||
pos = url.rfind('/')
|
||||
gifurl = url[:pos+1]
|
||||
#print 'try it:', gifurl + gifimg[5:len(gifimg)-1]
|
||||
br.open_novisit(gifurl + gifimg[5:len(gifimg)-1])
|
||||
new_html = new_html.replace(img, gifimg)
|
||||
except:
|
||||
pos = img.find('"')
|
||||
newimg = img[0:pos+1] + '_' + img[pos+1:]
|
||||
#print 'Use hi-res img', newimg
|
||||
new_html = new_html.replace(img, newimg)
|
||||
return new_html
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
@ -447,77 +684,153 @@ class MPRecipe(BasicNewsRecipe):
|
||||
del item['absmiddle']
|
||||
return soup
|
||||
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
# thumbnails shouldn't be available if using hi-res images
|
||||
if __IncludeThumbnails__ and __HiResImg__ == False and first and hasattr(self, 'add_toc_thumbnail'):
|
||||
img = soup.find('img')
|
||||
if img is not None:
|
||||
self.add_toc_thumbnail(article, img['src'])
|
||||
|
||||
try:
|
||||
if __IncludeSummary__ and len(article.text_summary.strip()) == 0:
|
||||
# look for content
|
||||
articlebodies = soup.findAll('div',attrs={'id':'newscontent'})
|
||||
if not articlebodies:
|
||||
articlebodies = soup.findAll('div',attrs={'id':'newscontent01'})
|
||||
if not articlebodies:
|
||||
articlebodies = soup.findAll('div',attrs={'class':'content'})
|
||||
if not articlebodies:
|
||||
articlebodies = soup.findAll('div', attrs={'id':'font'})
|
||||
if articlebodies:
|
||||
for articlebody in articlebodies:
|
||||
if articlebody:
|
||||
# the text may or may not be enclosed in <p></p> tag
|
||||
paras = articlebody.findAll('p')
|
||||
if not paras:
|
||||
paras = articlebody
|
||||
textFound = False
|
||||
for p in paras:
|
||||
if not textFound:
|
||||
summary_candidate = self.tag_to_string(p).strip()
|
||||
summary_candidate = summary_candidate.replace(u'\u3010\u660e\u5831\u5c08\u8a0a\u3011', '', 1)
|
||||
if len(summary_candidate) > 0:
|
||||
article.summary = article.text_summary = summary_candidate
|
||||
textFound = True
|
||||
else:
|
||||
# display a simple text
|
||||
#article.summary = article.text_summary = u'\u66f4\u591a......'
|
||||
# display word counts
|
||||
counts = 0
|
||||
articlebodies = soup.findAll('div',attrs={'id':'newscontent'})
|
||||
if not articlebodies:
|
||||
articlebodies = soup.findAll('div',attrs={'id':'newscontent01'})
|
||||
if not articlebodies:
|
||||
articlebodies = soup.findAll('div',attrs={'class':'content'})
|
||||
if not articlebodies:
|
||||
articlebodies = soup.findAll('div', attrs={'id':'font'})
|
||||
if articlebodies:
|
||||
for articlebody in articlebodies:
|
||||
# the text may or may not be enclosed in <p></p> tag
|
||||
paras = articlebody.findAll('p')
|
||||
if not paras:
|
||||
paras = articlebody
|
||||
for p in paras:
|
||||
summary_candidate = self.tag_to_string(p).strip()
|
||||
counts += len(summary_candidate)
|
||||
article.summary = article.text_summary = u'\uff08' + str(counts) + u'\u5b57\uff09'
|
||||
except:
|
||||
self.log("Error creating article descriptions")
|
||||
return
|
||||
|
||||
# override from the one in version 0.8.31
|
||||
def create_opf(self, feeds, dir=None):
|
||||
if dir is None:
|
||||
dir = self.output_dir
|
||||
if __UseChineseTitle__ == True:
|
||||
if __Region__ == 'Hong Kong':
|
||||
title = u'\u660e\u5831 (\u9999\u6e2f)'
|
||||
elif __Region__ == 'Vancouver':
|
||||
title = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
|
||||
elif __Region__ == 'Toronto':
|
||||
title = u'\u660e\u5831 (\u591a\u502b\u591a)'
|
||||
else:
|
||||
title = self.short_title()
|
||||
# if not generating a periodical, force date to apply in title
|
||||
if __MakePeriodical__ == False:
|
||||
title = self.short_title()
|
||||
# change 1: allow our own flag to tell if a periodical is to be generated
|
||||
# also use customed date instead of current time
|
||||
if __MakePeriodical__ == False or self.output_profile.periodical_date_in_title:
|
||||
title = title + ' ' + self.get_fetchformatteddate()
|
||||
if True:
|
||||
mi = MetaInformation(title, [self.publisher])
|
||||
mi.publisher = self.publisher
|
||||
mi.author_sort = self.publisher
|
||||
if __MakePeriodical__ == True:
|
||||
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
|
||||
else:
|
||||
mi.publication_type = self.publication_type+':'+self.short_title()
|
||||
#mi.timestamp = nowf()
|
||||
mi.timestamp = self.get_dtlocal()
|
||||
mi.comments = self.description
|
||||
if not isinstance(mi.comments, unicode):
|
||||
mi.comments = mi.comments.decode('utf-8', 'replace')
|
||||
#mi.pubdate = nowf()
|
||||
mi.pubdate = self.get_dtlocal()
|
||||
opf_path = os.path.join(dir, 'index.opf')
|
||||
ncx_path = os.path.join(dir, 'index.ncx')
|
||||
opf = OPFCreator(dir, mi)
|
||||
# Add mastheadImage entry to <guide> section
|
||||
mp = getattr(self, 'masthead_path', None)
|
||||
if mp is not None and os.access(mp, os.R_OK):
|
||||
from calibre.ebooks.metadata.opf2 import Guide
|
||||
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
|
||||
ref.type = 'masthead'
|
||||
ref.title = 'Masthead Image'
|
||||
opf.guide.append(ref)
|
||||
# end of change 1
|
||||
# change 2: __appname__ replaced by newspaper publisher
|
||||
__appname__ = self.publisher
|
||||
mi = MetaInformation(title, [__appname__])
|
||||
mi.publisher = __appname__
|
||||
mi.author_sort = __appname__
|
||||
# change 3: use __MakePeriodical__ flag to tell if a periodical should be generated
|
||||
if __MakePeriodical__ == True:
|
||||
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
|
||||
else:
|
||||
mi.publication_type = self.publication_type+':'+self.short_title()
|
||||
#mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
|
||||
# change 4: in the following, all the nowf() are changed to adjusted time
|
||||
# This one doesn't matter
|
||||
mi.timestamp = nowf()
|
||||
# change 5: skip listing the articles
|
||||
#article_titles, aseen = [], set()
|
||||
#for f in feeds:
|
||||
# for a in f:
|
||||
# if a.title and a.title not in aseen:
|
||||
# aseen.add(a.title)
|
||||
# article_titles.append(force_unicode(a.title, 'utf-8'))
|
||||
|
||||
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
|
||||
manifest.append(os.path.join(dir, 'index.html'))
|
||||
manifest.append(os.path.join(dir, 'index.ncx'))
|
||||
#mi.comments = self.description
|
||||
#if not isinstance(mi.comments, unicode):
|
||||
# mi.comments = mi.comments.decode('utf-8', 'replace')
|
||||
#mi.comments += ('\n\n' + _('Articles in this issue: ') + '\n' +
|
||||
# '\n\n'.join(article_titles))
|
||||
|
||||
# Get cover
|
||||
cpath = getattr(self, 'cover_path', None)
|
||||
if cpath is None:
|
||||
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
|
||||
if self.default_cover(pf):
|
||||
cpath = pf.name
|
||||
if cpath is not None and os.access(cpath, os.R_OK):
|
||||
opf.cover = cpath
|
||||
manifest.append(cpath)
|
||||
language = canonicalize_lang(self.language)
|
||||
if language is not None:
|
||||
mi.language = language
|
||||
# This one affects the pub date shown in kindle title
|
||||
#mi.pubdate = nowf()
|
||||
# now appears to need the time field to be > 12.00noon as well
|
||||
mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0)
|
||||
opf_path = os.path.join(dir, 'index.opf')
|
||||
ncx_path = os.path.join(dir, 'index.ncx')
|
||||
|
||||
# Get masthead
|
||||
mpath = getattr(self, 'masthead_path', None)
|
||||
if mpath is not None and os.access(mpath, os.R_OK):
|
||||
manifest.append(mpath)
|
||||
opf = OPFCreator(dir, mi)
|
||||
# Add mastheadImage entry to <guide> section
|
||||
mp = getattr(self, 'masthead_path', None)
|
||||
if mp is not None and os.access(mp, os.R_OK):
|
||||
from calibre.ebooks.metadata.opf2 import Guide
|
||||
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
|
||||
ref.type = 'masthead'
|
||||
ref.title = 'Masthead Image'
|
||||
opf.guide.append(ref)
|
||||
|
||||
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
|
||||
manifest.append(os.path.join(dir, 'index.html'))
|
||||
manifest.append(os.path.join(dir, 'index.ncx'))
|
||||
|
||||
# Get cover
|
||||
cpath = getattr(self, 'cover_path', None)
|
||||
if cpath is None:
|
||||
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
|
||||
if self.default_cover(pf):
|
||||
cpath = pf.name
|
||||
if cpath is not None and os.access(cpath, os.R_OK):
|
||||
opf.cover = cpath
|
||||
manifest.append(cpath)
|
||||
|
||||
# Get masthead
|
||||
mpath = getattr(self, 'masthead_path', None)
|
||||
if mpath is not None and os.access(mpath, os.R_OK):
|
||||
manifest.append(mpath)
|
||||
|
||||
opf.create_manifest_from_files_in(manifest)
|
||||
for mani in opf.manifest:
|
||||
if mani.path.endswith('.ncx'):
|
||||
mani.id = 'ncx'
|
||||
if mani.path.endswith('mastheadImage.jpg'):
|
||||
mani.id = 'masthead-image'
|
||||
|
||||
entries = ['index.html']
|
||||
toc = TOC(base_path=dir)
|
||||
self.play_order_counter = 0
|
||||
self.play_order_map = {}
|
||||
|
||||
opf.create_manifest_from_files_in(manifest)
|
||||
for mani in opf.manifest:
|
||||
if mani.path.endswith('.ncx'):
|
||||
mani.id = 'ncx'
|
||||
if mani.path.endswith('mastheadImage.jpg'):
|
||||
mani.id = 'masthead-image'
|
||||
entries = ['index.html']
|
||||
toc = TOC(base_path=dir)
|
||||
self.play_order_counter = 0
|
||||
self.play_order_map = {}
|
||||
|
||||
def feed_index(num, parent):
|
||||
f = feeds[num]
|
||||
@ -532,13 +845,16 @@ class MPRecipe(BasicNewsRecipe):
|
||||
desc = None
|
||||
else:
|
||||
desc = self.description_limiter(desc)
|
||||
tt = a.toc_thumbnail if a.toc_thumbnail else None
|
||||
entries.append('%sindex.html'%adir)
|
||||
po = self.play_order_map.get(entries[-1], None)
|
||||
if po is None:
|
||||
self.play_order_counter += 1
|
||||
po = self.play_order_counter
|
||||
parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
|
||||
play_order=po, author=auth, description=desc)
|
||||
parent.add_item('%sindex.html'%adir, None,
|
||||
a.title if a.title else _('Untitled Article'),
|
||||
play_order=po, author=auth,
|
||||
description=desc, toc_thumbnail=tt)
|
||||
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
|
||||
for sp in a.sub_pages:
|
||||
prefix = os.path.commonprefix([opf_path, sp])
|
||||
@ -555,7 +871,7 @@ class MPRecipe(BasicNewsRecipe):
|
||||
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
|
||||
templ = self.navbar.generate(True, num, j, len(f),
|
||||
not self.has_single_feed,
|
||||
a.orig_url, self.publisher, prefix=prefix,
|
||||
a.orig_url, __appname__, prefix=prefix,
|
||||
center=self.center_navbar)
|
||||
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
|
||||
body.insert(len(body.contents), elem)
|
||||
@ -578,7 +894,7 @@ class MPRecipe(BasicNewsRecipe):
|
||||
if not desc:
|
||||
desc = None
|
||||
feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
|
||||
f.title, play_order=po, description=desc, author=auth))
|
||||
f.title, play_order=po, description=desc, author=auth))
|
||||
|
||||
else:
|
||||
entries.append('feed_%d/index.html'%0)
|
||||
@ -592,3 +908,4 @@ class MPRecipe(BasicNewsRecipe):
|
||||
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
|
||||
opf.render(opf_file, ncx_file)
|
||||
|
||||
|
||||
|
@ -4,18 +4,41 @@ __copyright__ = '2010-2011, Eddie Lau'
|
||||
# Region - Hong Kong, Vancouver, Toronto
|
||||
__Region__ = 'Vancouver'
|
||||
# Users of Kindle 3 with limited system-level CJK support
|
||||
# please replace the following "True" with "False".
|
||||
# please replace the following "True" with "False". (Default: True)
|
||||
__MakePeriodical__ = True
|
||||
# Turn below to true if your device supports display of CJK titles
|
||||
# Turn below to True if your device supports display of CJK titles (Default: False)
|
||||
__UseChineseTitle__ = False
|
||||
# Set it to False if you want to skip images
|
||||
# Set it to False if you want to skip images (Default: True)
|
||||
__KeepImages__ = True
|
||||
# (HK only) Turn below to true if you wish to use life.mingpao.com as the main article source
|
||||
# Set it to True if you want to include a summary in Kindle's article view (Default: False)
|
||||
__IncludeSummary__ = False
|
||||
# Set it to True if you want thumbnail images in Kindle's article view (Default: True)
|
||||
__IncludeThumbnails__ = True
|
||||
# (HK only) Turn below to True if you wish to use life.mingpao.com as the main article source (Default: True)
|
||||
__UseLife__ = True
|
||||
# (HK only) It is to disable premium content (Default: False)
|
||||
__InclPremium__ = False
|
||||
# (HK only) Turn below to True if you wish to parse articles in news.mingpao.com with their printer-friendly formats (Default: True)
|
||||
__ParsePFF__ = True
|
||||
# (HK only) Turn below to True if you wish hi-res images (Default: False)
|
||||
__HiResImg__ = False
|
||||
# Override the date returned by the program if specifying a YYYYMMDD below
|
||||
__Date__ = ''
|
||||
|
||||
|
||||
'''
|
||||
Change Log:
|
||||
2011/12/18: update the overridden create_odf(.) routine with the one from Calibre version 0.8.31. Move __UseChineseTitle__ usage away
|
||||
from create_odf(.). Optional support of text_summary and thumbnail images in Kindle's article view. Start new day
|
||||
download of Hong Kong Mingpao at 4.30am. Set the actual publication date shown on kindle device.
|
||||
2011/12/01: take care of situation that in txt source parsing, the article content does start with special character u'\u3010'
|
||||
2011/10/21: fix a bug that hi-res img is unavailable in pages parsed from source txt
|
||||
2011/10/19: fix a bug in txt source parsing
|
||||
2011/10/17: disable fetching of premium content, also improved txt source parsing
|
||||
2011/10/04: option to get hi-res photos for the articles
|
||||
2011/09/21: fetching "column" section is made optional.
|
||||
2011/09/18: parse "column" section stuff from source text file directly.
|
||||
2011/09/07: disable "column" section as it is no longer offered free.
|
||||
2011/06/26: add fetching Vancouver and Toronto versions of the paper, also provide captions for images using life.mingpao fetch source
|
||||
provide options to remove all images in the file
|
||||
2011/05/12: switch the main parse source to life.mingpao.com, which has more photos on the article pages
|
||||
@ -37,30 +60,38 @@ Change Log:
|
||||
2010/10/31: skip repeated articles in section pages
|
||||
'''
|
||||
|
||||
import os, datetime, re
|
||||
from calibre.utils.date import now as nowf
|
||||
import os, datetime, re, mechanize
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from contextlib import nested
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
from calibre.ebooks.metadata.toc import TOC
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
from calibre.utils.localization import canonicalize_lang
|
||||
|
||||
# MAIN CLASS
|
||||
class MPRecipe(BasicNewsRecipe):
|
||||
if __Region__ == 'Hong Kong':
|
||||
title = 'Ming Pao - Hong Kong'
|
||||
if __UseChineseTitle__ == True:
|
||||
title = u'\u660e\u5831 (\u9999\u6e2f)'
|
||||
else:
|
||||
title = 'Ming Pao - Hong Kong'
|
||||
description = 'Hong Kong Chinese Newspaper (http://news.mingpao.com)'
|
||||
category = 'Chinese, News, Hong Kong'
|
||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} font>b {font-size:200%; font-weight:bold;}'
|
||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} font>b {font-size:200%; font-weight:bold;} div[class=heading] {font-size:200%; font-weight:bold;} div[class=images] {font-size:50%;}'
|
||||
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
|
||||
keep_only_tags = [dict(name='h1'),
|
||||
dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page title
|
||||
dict(name='font', attrs={'color':['AA0000']}), # for column articles title
|
||||
dict(attrs={'class':['heading']}), # for heading from txt
|
||||
dict(attrs={'id':['newscontent']}), # entertainment and column page content
|
||||
dict(attrs={'id':['newscontent01','newscontent02']}),
|
||||
dict(attrs={'class':['content']}), # for content from txt
|
||||
dict(attrs={'class':['photo']}),
|
||||
dict(name='table', attrs={'width':['100%'], 'border':['0'], 'cellspacing':['5'], 'cellpadding':['0']}), # content in printed version of life.mingpao.com
|
||||
dict(name='img', attrs={'width':['180'], 'alt':['按圖放大']}) # images for source from life.mingpao.com
|
||||
dict(name='img', attrs={'width':['180'], 'alt':['????']}), # images for source from life.mingpao.com
|
||||
dict(attrs={'class':['images']}) # for images from txt
|
||||
]
|
||||
if __KeepImages__:
|
||||
remove_tags = [dict(name='style'),
|
||||
@ -90,7 +121,10 @@ class MPRecipe(BasicNewsRecipe):
|
||||
lambda match: "</b>")
|
||||
]
|
||||
elif __Region__ == 'Vancouver':
|
||||
title = 'Ming Pao - Vancouver'
|
||||
if __UseChineseTitle__ == True:
|
||||
title = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
|
||||
else:
|
||||
title = 'Ming Pao - Vancouver'
|
||||
description = 'Vancouver Chinese Newspaper (http://www.mingpaovan.com)'
|
||||
category = 'Chinese, News, Vancouver'
|
||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
|
||||
@ -108,7 +142,10 @@ class MPRecipe(BasicNewsRecipe):
|
||||
lambda match: ''),
|
||||
]
|
||||
elif __Region__ == 'Toronto':
|
||||
title = 'Ming Pao - Toronto'
|
||||
if __UseChineseTitle__ == True:
|
||||
title = u'\u660e\u5831 (\u591a\u502b\u591a)'
|
||||
else:
|
||||
title = 'Ming Pao - Toronto'
|
||||
description = 'Toronto Chinese Newspaper (http://www.mingpaotor.com)'
|
||||
category = 'Chinese, News, Toronto'
|
||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;} b>font {font-size:200%; font-weight:bold;}'
|
||||
@ -139,49 +176,12 @@ class MPRecipe(BasicNewsRecipe):
|
||||
conversion_options = {'linearize_tables':True}
|
||||
timefmt = ''
|
||||
|
||||
def image_url_processor(cls, baseurl, url):
|
||||
# trick: break the url at the first occurance of digit, add an additional
|
||||
# '_' at the front
|
||||
# not working, may need to move this to preprocess_html() method
|
||||
# minIdx = 10000
|
||||
# i0 = url.find('0')
|
||||
# if i0 >= 0 and i0 < minIdx:
|
||||
# minIdx = i0
|
||||
# i1 = url.find('1')
|
||||
# if i1 >= 0 and i1 < minIdx:
|
||||
# minIdx = i1
|
||||
# i2 = url.find('2')
|
||||
# if i2 >= 0 and i2 < minIdx:
|
||||
# minIdx = i2
|
||||
# i3 = url.find('3')
|
||||
# if i3 >= 0 and i0 < minIdx:
|
||||
# minIdx = i3
|
||||
# i4 = url.find('4')
|
||||
# if i4 >= 0 and i4 < minIdx:
|
||||
# minIdx = i4
|
||||
# i5 = url.find('5')
|
||||
# if i5 >= 0 and i5 < minIdx:
|
||||
# minIdx = i5
|
||||
# i6 = url.find('6')
|
||||
# if i6 >= 0 and i6 < minIdx:
|
||||
# minIdx = i6
|
||||
# i7 = url.find('7')
|
||||
# if i7 >= 0 and i7 < minIdx:
|
||||
# minIdx = i7
|
||||
# i8 = url.find('8')
|
||||
# if i8 >= 0 and i8 < minIdx:
|
||||
# minIdx = i8
|
||||
# i9 = url.find('9')
|
||||
# if i9 >= 0 and i9 < minIdx:
|
||||
# minIdx = i9
|
||||
return url
|
||||
|
||||
def get_dtlocal(self):
|
||||
dt_utc = datetime.datetime.utcnow()
|
||||
if __Region__ == 'Hong Kong':
|
||||
# convert UTC to local hk time - at HKT 5.30am, all news are available
|
||||
dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(5.5/24)
|
||||
# dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(5.5/24)
|
||||
# convert UTC to local hk time - at HKT 4.30am, all news are available
|
||||
dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(4.5/24)
|
||||
# dt_local = dt_utc.astimezone(pytz.timezone('Asia/Hong_Kong')) - datetime.timedelta(4.5/24)
|
||||
elif __Region__ == 'Vancouver':
|
||||
# convert UTC to local Vancouver time - at PST time 5.30am, all news are available
|
||||
dt_local = dt_utc + datetime.timedelta(-8.0/24) - datetime.timedelta(5.5/24)
|
||||
@ -193,13 +193,34 @@ class MPRecipe(BasicNewsRecipe):
|
||||
return dt_local
|
||||
|
||||
def get_fetchdate(self):
|
||||
return self.get_dtlocal().strftime("%Y%m%d")
|
||||
if __Date__ <> '':
|
||||
return __Date__
|
||||
else:
|
||||
return self.get_dtlocal().strftime("%Y%m%d")
|
||||
|
||||
def get_fetchformatteddate(self):
|
||||
return self.get_dtlocal().strftime("%Y-%m-%d")
|
||||
if __Date__ <> '':
|
||||
return __Date__[0:4]+'-'+__Date__[4:6]+'-'+__Date__[6:8]
|
||||
else:
|
||||
return self.get_dtlocal().strftime("%Y-%m-%d")
|
||||
|
||||
def get_fetchyear(self):
|
||||
if __Date__ <> '':
|
||||
return __Date__[0:4]
|
||||
else:
|
||||
return self.get_dtlocal().strftime("%Y")
|
||||
|
||||
def get_fetchmonth(self):
|
||||
if __Date__ <> '':
|
||||
return __Date__[4:6]
|
||||
else:
|
||||
return self.get_dtlocal().strftime("%m")
|
||||
|
||||
def get_fetchday(self):
|
||||
return self.get_dtlocal().strftime("%d")
|
||||
if __Date__ <> '':
|
||||
return __Date__[6:8]
|
||||
else:
|
||||
return self.get_dtlocal().strftime("%d")
|
||||
|
||||
def get_cover_url(self):
|
||||
if __Region__ == 'Hong Kong':
|
||||
@ -230,12 +251,23 @@ class MPRecipe(BasicNewsRecipe):
|
||||
(u'\u570b\u969b World', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalta', 'nal'),
|
||||
(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal'),
|
||||
(u'\u9ad4\u80b2 Sport', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalsp', 'nal'),
|
||||
(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal'),
|
||||
(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')]:
|
||||
articles = self.parse_section2(url, keystr)
|
||||
(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal')
|
||||
]:
|
||||
if __InclPremium__ == True:
|
||||
articles = self.parse_section2_txt(url, keystr)
|
||||
else:
|
||||
articles = self.parse_section2(url, keystr)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
if __InclPremium__ == True:
|
||||
# parse column section articles directly from .txt files
|
||||
for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
|
||||
]:
|
||||
articles = self.parse_section2_txt(url, keystr)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
||||
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
@ -244,15 +276,16 @@ class MPRecipe(BasicNewsRecipe):
|
||||
else:
|
||||
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
|
||||
(u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
|
||||
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm')]:
|
||||
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'),
|
||||
(u'\u793e\u8a55/\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
# special- editorial
|
||||
ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
|
||||
if ed_articles:
|
||||
feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
|
||||
#ed_articles = self.parse_ed_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=nalmr')
|
||||
#if ed_articles:
|
||||
# feeds.append((u'\u793e\u8a55/\u7b46\u9663 Editorial', ed_articles))
|
||||
|
||||
for title, url in [(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
|
||||
(u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
|
||||
@ -263,20 +296,39 @@ class MPRecipe(BasicNewsRecipe):
|
||||
|
||||
# special - finance
|
||||
#fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
|
||||
fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
|
||||
if fin_articles:
|
||||
feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
|
||||
#fin_articles = self.parse_fin_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea')
|
||||
#if fin_articles:
|
||||
# feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
|
||||
|
||||
for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
|
||||
(u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
for title, url, keystr in [(u'\u7d93\u6fdf Finance', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalea', 'nal')]:
|
||||
articles = self.parse_section2_txt(url, keystr)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
#for title, url in [('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
|
||||
# (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm')]:
|
||||
# articles = self.parse_section(url)
|
||||
# if articles:
|
||||
# feeds.append((title, articles))
|
||||
|
||||
# special - entertainment
|
||||
ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
|
||||
if ent_articles:
|
||||
feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
|
||||
#ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
|
||||
#if ent_articles:
|
||||
# feeds.append((u'\u5f71\u8996 Film/TV', ent_articles))
|
||||
|
||||
for title, url, keystr in [(u'\u5f71\u8996 Film/TV', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr + '&Category=nalma', 'nal')
|
||||
]:
|
||||
articles = self.parse_section2_txt(url, keystr)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
if __InclPremium__ == True:
|
||||
# parse column section articles directly from .txt files
|
||||
for title, url, keystr in [(u'\u5c08\u6b04 Columns', 'http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn', 'ncl')
|
||||
]:
|
||||
articles = self.parse_section2_txt(url, keystr)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
for title, url in [(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
||||
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
||||
@ -284,11 +336,6 @@ class MPRecipe(BasicNewsRecipe):
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
|
||||
|
||||
# special- columns
|
||||
col_articles = self.parse_col_section('http://life.mingpao.com/cfm/dailynews2.cfm?Issue=' + dateStr +'&Category=ncolumn')
|
||||
if col_articles:
|
||||
feeds.append((u'\u5c08\u6b04 Columns', col_articles))
|
||||
elif __Region__ == 'Vancouver':
|
||||
for title, url in [(u'\u8981\u805e Headline', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VAindex.htm'),
|
||||
(u'\u52a0\u570b Canada', 'http://www.mingpaovan.com/htm/News/' + dateStr + '/VBindex.htm'),
|
||||
@ -332,6 +379,16 @@ class MPRecipe(BasicNewsRecipe):
|
||||
title = self.tag_to_string(a)
|
||||
url = a.get('href', False)
|
||||
url = 'http://news.mingpao.com/' + dateStr + '/' +url
|
||||
# replace the url to the print-friendly version
|
||||
if __ParsePFF__ == True:
|
||||
if url.rfind('Redirect') <> -1 and __InclPremium__ == True:
|
||||
url = re.sub(dateStr + '.*' + dateStr, dateStr, url)
|
||||
url = re.sub('%2F.*%2F', '/', url)
|
||||
title = title.replace(u'\u6536\u8cbb\u5167\u5bb9', '')
|
||||
url = url.replace('%2Etxt', '_print.htm')
|
||||
url = url.replace('%5F', '_')
|
||||
else:
|
||||
url = url.replace('.htm', '_print.htm')
|
||||
if url not in included_urls and url.rfind('Redirect') == -1:
|
||||
current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
|
||||
included_urls.append(url)
|
||||
@ -340,6 +397,8 @@ class MPRecipe(BasicNewsRecipe):
|
||||
|
||||
# parse from life.mingpao.com
|
||||
def parse_section2(self, url, keystr):
|
||||
br = mechanize.Browser()
|
||||
br.set_handle_redirect(False)
|
||||
self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
a = soup.findAll('a', href=True)
|
||||
@ -350,7 +409,29 @@ class MPRecipe(BasicNewsRecipe):
|
||||
title = self.tag_to_string(i)
|
||||
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
|
||||
url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article
|
||||
try:
|
||||
br.open_novisit(url)
|
||||
url = url.replace('dailynews3.cfm', 'dailynews3a.cfm') # use printed version of the article
|
||||
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||
included_urls.append(url)
|
||||
except:
|
||||
print 'skipping a premium article'
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
|
||||
# parse from text file of life.mingpao.com
|
||||
def parse_section2_txt(self, url, keystr):
|
||||
self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
a = soup.findAll('a', href=True)
|
||||
a.reverse()
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
for i in a:
|
||||
title = self.tag_to_string(i)
|
||||
url = 'http://life.mingpao.com/cfm/' + i.get('href', False)
|
||||
if (url not in included_urls) and (not url.rfind('.txt') == -1) and (not url.rfind(keystr) == -1):
|
||||
url = url.replace('cfm/dailynews3.cfm?File=', 'ftp/Life3/') # use printed version of the article
|
||||
current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||
included_urls.append(url)
|
||||
current_articles.reverse()
|
||||
@ -438,6 +519,162 @@ class MPRecipe(BasicNewsRecipe):
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
|
||||
# preprocess those .txt and javascript based files
|
||||
def preprocess_raw_html(self, raw_html, url):
|
||||
new_html = raw_html
|
||||
if url.rfind('ftp') <> -1 or url.rfind('_print.htm') <> -1:
|
||||
if url.rfind('_print.htm') <> -1:
|
||||
# javascript based file
|
||||
splitter = re.compile(r'\n')
|
||||
new_raw_html = '<html><head><title>Untitled</title></head>'
|
||||
new_raw_html = new_raw_html + '<body>'
|
||||
for item in splitter.split(raw_html):
|
||||
if item.startswith('var heading1 ='):
|
||||
heading = item.replace('var heading1 = \'', '')
|
||||
heading = heading.replace('\'', '')
|
||||
heading = heading.replace(';', '')
|
||||
new_raw_html = new_raw_html + '<div class="heading">' + heading
|
||||
if item.startswith('var heading2 ='):
|
||||
heading = item.replace('var heading2 = \'', '')
|
||||
heading = heading.replace('\'', '')
|
||||
heading = heading.replace(';', '')
|
||||
if heading <> '':
|
||||
new_raw_html = new_raw_html + '<br>' + heading + '</div>'
|
||||
else:
|
||||
new_raw_html = new_raw_html + '</div>'
|
||||
if item.startswith('var content ='):
|
||||
content = item.replace("var content = ", '')
|
||||
content = content.replace('\'', '')
|
||||
content = content.replace(';', '')
|
||||
new_raw_html = new_raw_html + '<div class="content">' + content + '</div>'
|
||||
if item.startswith('var photocontent ='):
|
||||
photo = item.replace('var photocontent = \'', '')
|
||||
photo = photo.replace('\'', '')
|
||||
photo = photo.replace(';', '')
|
||||
photo = photo.replace('<tr>', '')
|
||||
photo = photo.replace('<td>', '')
|
||||
photo = photo.replace('</tr>', '')
|
||||
photo = photo.replace('</td>', '<br>')
|
||||
photo = photo.replace('class="photo"', '')
|
||||
new_raw_html = new_raw_html + '<div class="images">' + photo + '</div>'
|
||||
new_html = new_raw_html + '</body></html>'
|
||||
else:
|
||||
# .txt based file
|
||||
splitter = re.compile(r'\n') # Match non-digits
|
||||
new_raw_html = '<html><head><title>Untitled</title></head><body><div class="images">'
|
||||
next_is_img_txt = False
|
||||
title_started = False
|
||||
title_break_reached = False
|
||||
met_article_start_char = False
|
||||
for item in splitter.split(raw_html):
|
||||
item = item.strip()
|
||||
# if title already reached but break between title and content not yet found, record title_break_reached
|
||||
if title_started == True and title_break_reached == False and item == '':
|
||||
title_break_reached = True
|
||||
# if title reached and title_break_reached and met_article_start_char == False and item is not empty
|
||||
# start content
|
||||
elif title_started == True and title_break_reached == True and met_article_start_char == False:
|
||||
if item <> '':
|
||||
met_article_start_char = True
|
||||
new_raw_html = new_raw_html + '</div><div class="content"><p>' + item + '<p>\n'
|
||||
#if item.startswith(u'\u3010'):
|
||||
# met_article_start_char = True
|
||||
# new_raw_html = new_raw_html + '</div><div class="content"><p>' + item + '<p>\n'
|
||||
else:
|
||||
if next_is_img_txt == False:
|
||||
if item.startswith("=@"):
|
||||
print 'skip movie link'
|
||||
elif item.startswith("=?"):
|
||||
next_is_img_txt = True
|
||||
new_raw_html += '<img src="' + str(item)[2:].strip() + '.gif" /><p>\n'
|
||||
elif item.startswith('=='):
|
||||
next_is_img_txt = True
|
||||
if False:
|
||||
# TODO: check existence of .gif first
|
||||
newimg = '_' + item[2:].strip() + '.jpg'
|
||||
new_raw_html += '<img src="' + newimg + '" /><p>\n'
|
||||
else:
|
||||
new_raw_html += '<img src="' + str(item)[2:].strip() + '.jpg" /><p>\n'
|
||||
elif item.startswith('='):
|
||||
next_is_img_txt = True
|
||||
if False:
|
||||
# TODO: check existence of .gif first
|
||||
newimg = '_' + item[1:].strip() + '.jpg'
|
||||
new_raw_html += '<img src="' + newimg + '" /><p>\n'
|
||||
else:
|
||||
new_raw_html += '<img src="' + str(item)[1:].strip() + '.jpg" /><p>\n'
|
||||
else:
|
||||
if next_is_img_txt == False and met_article_start_char == False:
|
||||
if item <> '':
|
||||
if title_started == False:
|
||||
#print 'Title started at ', item
|
||||
new_raw_html = new_raw_html + '</div><div class="heading">' + item + '\n'
|
||||
title_started = True
|
||||
else:
|
||||
new_raw_html = new_raw_html + item + '\n'
|
||||
else:
|
||||
new_raw_html = new_raw_html + item + '<p>\n'
|
||||
else:
|
||||
next_is_img_txt = False
|
||||
new_raw_html = new_raw_html + item + '\n'
|
||||
new_html = new_raw_html + '</div></body></html>'
|
||||
#raw_html = raw_html.replace(u'<p>\u3010', u'\u3010')
|
||||
if __HiResImg__ == True:
|
||||
# TODO: add a _ in front of an image url
|
||||
if url.rfind('news.mingpao.com') > -1:
|
||||
imglist = re.findall('src="?.*?jpg"', new_html)
|
||||
br = mechanize.Browser()
|
||||
br.set_handle_redirect(False)
|
||||
for img in imglist:
|
||||
gifimg = img.replace('jpg"', 'gif"')
|
||||
try:
|
||||
br.open_novisit(url + "/../" + gifimg[5:len(gifimg)-1])
|
||||
new_html = new_html.replace(img, gifimg)
|
||||
except:
|
||||
# find the location of the first _
|
||||
pos = img.find('_')
|
||||
if pos > -1:
|
||||
# if found, insert _ after the first _
|
||||
newimg = img[0:pos] + '_' + img[pos:]
|
||||
new_html = new_html.replace(img, newimg)
|
||||
else:
|
||||
# if not found, insert _ after "
|
||||
new_html = new_html.replace(img[1:], '"_' + img[1:])
|
||||
elif url.rfind('life.mingpao.com') > -1:
|
||||
imglist = re.findall('src=\'?.*?jpg\'', new_html)
|
||||
br = mechanize.Browser()
|
||||
br.set_handle_redirect(False)
|
||||
#print 'Img list: ', imglist, '\n'
|
||||
for img in imglist:
|
||||
#print 'Found img: ', img
|
||||
gifimg = img.replace('jpg\'', 'gif\'')
|
||||
try:
|
||||
gifurl = re.sub(r'dailynews.*txt', '', url)
|
||||
br.open_novisit(gifurl + gifimg[5:len(gifimg)-1])
|
||||
new_html = new_html.replace(img, gifimg)
|
||||
except:
|
||||
pos = img.rfind('/')
|
||||
newimg = img[0:pos+1] + '_' + img[pos+1:]
|
||||
new_html = new_html.replace(img, newimg)
|
||||
# repeat with src quoted by double quotes, for text parsed from src txt
|
||||
imglist = re.findall('src="?.*?jpg"', new_html)
|
||||
for img in imglist:
|
||||
#print 'Found img: ', img
|
||||
gifimg = img.replace('jpg"', 'gif"')
|
||||
try:
|
||||
#print 'url', url
|
||||
pos = url.rfind('/')
|
||||
gifurl = url[:pos+1]
|
||||
#print 'try it:', gifurl + gifimg[5:len(gifimg)-1]
|
||||
br.open_novisit(gifurl + gifimg[5:len(gifimg)-1])
|
||||
new_html = new_html.replace(img, gifimg)
|
||||
except:
|
||||
pos = img.find('"')
|
||||
newimg = img[0:pos+1] + '_' + img[pos+1:]
|
||||
#print 'Use hi-res img', newimg
|
||||
new_html = new_html.replace(img, newimg)
|
||||
return new_html
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
@ -447,77 +684,153 @@ class MPRecipe(BasicNewsRecipe):
|
||||
del item['absmiddle']
|
||||
return soup
|
||||
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
# thumbnails shouldn't be available if using hi-res images
|
||||
if __IncludeThumbnails__ and __HiResImg__ == False and first and hasattr(self, 'add_toc_thumbnail'):
|
||||
img = soup.find('img')
|
||||
if img is not None:
|
||||
self.add_toc_thumbnail(article, img['src'])
|
||||
|
||||
try:
|
||||
if __IncludeSummary__ and len(article.text_summary.strip()) == 0:
|
||||
# look for content
|
||||
articlebodies = soup.findAll('div',attrs={'id':'newscontent'})
|
||||
if not articlebodies:
|
||||
articlebodies = soup.findAll('div',attrs={'id':'newscontent01'})
|
||||
if not articlebodies:
|
||||
articlebodies = soup.findAll('div',attrs={'class':'content'})
|
||||
if not articlebodies:
|
||||
articlebodies = soup.findAll('div', attrs={'id':'font'})
|
||||
if articlebodies:
|
||||
for articlebody in articlebodies:
|
||||
if articlebody:
|
||||
# the text may or may not be enclosed in <p></p> tag
|
||||
paras = articlebody.findAll('p')
|
||||
if not paras:
|
||||
paras = articlebody
|
||||
textFound = False
|
||||
for p in paras:
|
||||
if not textFound:
|
||||
summary_candidate = self.tag_to_string(p).strip()
|
||||
summary_candidate = summary_candidate.replace(u'\u3010\u660e\u5831\u5c08\u8a0a\u3011', '', 1)
|
||||
if len(summary_candidate) > 0:
|
||||
article.summary = article.text_summary = summary_candidate
|
||||
textFound = True
|
||||
else:
|
||||
# display a simple text
|
||||
#article.summary = article.text_summary = u'\u66f4\u591a......'
|
||||
# display word counts
|
||||
counts = 0
|
||||
articlebodies = soup.findAll('div',attrs={'id':'newscontent'})
|
||||
if not articlebodies:
|
||||
articlebodies = soup.findAll('div',attrs={'id':'newscontent01'})
|
||||
if not articlebodies:
|
||||
articlebodies = soup.findAll('div',attrs={'class':'content'})
|
||||
if not articlebodies:
|
||||
articlebodies = soup.findAll('div', attrs={'id':'font'})
|
||||
if articlebodies:
|
||||
for articlebody in articlebodies:
|
||||
# the text may or may not be enclosed in <p></p> tag
|
||||
paras = articlebody.findAll('p')
|
||||
if not paras:
|
||||
paras = articlebody
|
||||
for p in paras:
|
||||
summary_candidate = self.tag_to_string(p).strip()
|
||||
counts += len(summary_candidate)
|
||||
article.summary = article.text_summary = u'\uff08' + str(counts) + u'\u5b57\uff09'
|
||||
except:
|
||||
self.log("Error creating article descriptions")
|
||||
return
|
||||
|
||||
# override from the one in version 0.8.31
|
||||
def create_opf(self, feeds, dir=None):
|
||||
if dir is None:
|
||||
dir = self.output_dir
|
||||
if __UseChineseTitle__ == True:
|
||||
if __Region__ == 'Hong Kong':
|
||||
title = u'\u660e\u5831 (\u9999\u6e2f)'
|
||||
elif __Region__ == 'Vancouver':
|
||||
title = u'\u660e\u5831 (\u6eab\u54e5\u83ef)'
|
||||
elif __Region__ == 'Toronto':
|
||||
title = u'\u660e\u5831 (\u591a\u502b\u591a)'
|
||||
else:
|
||||
title = self.short_title()
|
||||
# if not generating a periodical, force date to apply in title
|
||||
if __MakePeriodical__ == False:
|
||||
title = self.short_title()
|
||||
# change 1: allow our own flag to tell if a periodical is to be generated
|
||||
# also use customed date instead of current time
|
||||
if __MakePeriodical__ == False or self.output_profile.periodical_date_in_title:
|
||||
title = title + ' ' + self.get_fetchformatteddate()
|
||||
if True:
|
||||
mi = MetaInformation(title, [self.publisher])
|
||||
mi.publisher = self.publisher
|
||||
mi.author_sort = self.publisher
|
||||
if __MakePeriodical__ == True:
|
||||
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
|
||||
else:
|
||||
mi.publication_type = self.publication_type+':'+self.short_title()
|
||||
#mi.timestamp = nowf()
|
||||
mi.timestamp = self.get_dtlocal()
|
||||
mi.comments = self.description
|
||||
if not isinstance(mi.comments, unicode):
|
||||
mi.comments = mi.comments.decode('utf-8', 'replace')
|
||||
#mi.pubdate = nowf()
|
||||
mi.pubdate = self.get_dtlocal()
|
||||
opf_path = os.path.join(dir, 'index.opf')
|
||||
ncx_path = os.path.join(dir, 'index.ncx')
|
||||
opf = OPFCreator(dir, mi)
|
||||
# Add mastheadImage entry to <guide> section
|
||||
mp = getattr(self, 'masthead_path', None)
|
||||
if mp is not None and os.access(mp, os.R_OK):
|
||||
from calibre.ebooks.metadata.opf2 import Guide
|
||||
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
|
||||
ref.type = 'masthead'
|
||||
ref.title = 'Masthead Image'
|
||||
opf.guide.append(ref)
|
||||
# end of change 1
|
||||
# change 2: __appname__ replaced by newspaper publisher
|
||||
__appname__ = self.publisher
|
||||
mi = MetaInformation(title, [__appname__])
|
||||
mi.publisher = __appname__
|
||||
mi.author_sort = __appname__
|
||||
# change 3: use __MakePeriodical__ flag to tell if a periodical should be generated
|
||||
if __MakePeriodical__ == True:
|
||||
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
|
||||
else:
|
||||
mi.publication_type = self.publication_type+':'+self.short_title()
|
||||
#mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
|
||||
# change 4: in the following, all the nowf() are changed to adjusted time
|
||||
# This one doesn't matter
|
||||
mi.timestamp = nowf()
|
||||
# change 5: skip listing the articles
|
||||
#article_titles, aseen = [], set()
|
||||
#for f in feeds:
|
||||
# for a in f:
|
||||
# if a.title and a.title not in aseen:
|
||||
# aseen.add(a.title)
|
||||
# article_titles.append(force_unicode(a.title, 'utf-8'))
|
||||
|
||||
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
|
||||
manifest.append(os.path.join(dir, 'index.html'))
|
||||
manifest.append(os.path.join(dir, 'index.ncx'))
|
||||
#mi.comments = self.description
|
||||
#if not isinstance(mi.comments, unicode):
|
||||
# mi.comments = mi.comments.decode('utf-8', 'replace')
|
||||
#mi.comments += ('\n\n' + _('Articles in this issue: ') + '\n' +
|
||||
# '\n\n'.join(article_titles))
|
||||
|
||||
# Get cover
|
||||
cpath = getattr(self, 'cover_path', None)
|
||||
if cpath is None:
|
||||
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
|
||||
if self.default_cover(pf):
|
||||
cpath = pf.name
|
||||
if cpath is not None and os.access(cpath, os.R_OK):
|
||||
opf.cover = cpath
|
||||
manifest.append(cpath)
|
||||
language = canonicalize_lang(self.language)
|
||||
if language is not None:
|
||||
mi.language = language
|
||||
# This one affects the pub date shown in kindle title
|
||||
#mi.pubdate = nowf()
|
||||
# now appears to need the time field to be > 12.00noon as well
|
||||
mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0)
|
||||
opf_path = os.path.join(dir, 'index.opf')
|
||||
ncx_path = os.path.join(dir, 'index.ncx')
|
||||
|
||||
# Get masthead
|
||||
mpath = getattr(self, 'masthead_path', None)
|
||||
if mpath is not None and os.access(mpath, os.R_OK):
|
||||
manifest.append(mpath)
|
||||
opf = OPFCreator(dir, mi)
|
||||
# Add mastheadImage entry to <guide> section
|
||||
mp = getattr(self, 'masthead_path', None)
|
||||
if mp is not None and os.access(mp, os.R_OK):
|
||||
from calibre.ebooks.metadata.opf2 import Guide
|
||||
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
|
||||
ref.type = 'masthead'
|
||||
ref.title = 'Masthead Image'
|
||||
opf.guide.append(ref)
|
||||
|
||||
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
|
||||
manifest.append(os.path.join(dir, 'index.html'))
|
||||
manifest.append(os.path.join(dir, 'index.ncx'))
|
||||
|
||||
# Get cover
|
||||
cpath = getattr(self, 'cover_path', None)
|
||||
if cpath is None:
|
||||
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
|
||||
if self.default_cover(pf):
|
||||
cpath = pf.name
|
||||
if cpath is not None and os.access(cpath, os.R_OK):
|
||||
opf.cover = cpath
|
||||
manifest.append(cpath)
|
||||
|
||||
# Get masthead
|
||||
mpath = getattr(self, 'masthead_path', None)
|
||||
if mpath is not None and os.access(mpath, os.R_OK):
|
||||
manifest.append(mpath)
|
||||
|
||||
opf.create_manifest_from_files_in(manifest)
|
||||
for mani in opf.manifest:
|
||||
if mani.path.endswith('.ncx'):
|
||||
mani.id = 'ncx'
|
||||
if mani.path.endswith('mastheadImage.jpg'):
|
||||
mani.id = 'masthead-image'
|
||||
|
||||
entries = ['index.html']
|
||||
toc = TOC(base_path=dir)
|
||||
self.play_order_counter = 0
|
||||
self.play_order_map = {}
|
||||
|
||||
opf.create_manifest_from_files_in(manifest)
|
||||
for mani in opf.manifest:
|
||||
if mani.path.endswith('.ncx'):
|
||||
mani.id = 'ncx'
|
||||
if mani.path.endswith('mastheadImage.jpg'):
|
||||
mani.id = 'masthead-image'
|
||||
entries = ['index.html']
|
||||
toc = TOC(base_path=dir)
|
||||
self.play_order_counter = 0
|
||||
self.play_order_map = {}
|
||||
|
||||
def feed_index(num, parent):
|
||||
f = feeds[num]
|
||||
@ -532,13 +845,16 @@ class MPRecipe(BasicNewsRecipe):
|
||||
desc = None
|
||||
else:
|
||||
desc = self.description_limiter(desc)
|
||||
tt = a.toc_thumbnail if a.toc_thumbnail else None
|
||||
entries.append('%sindex.html'%adir)
|
||||
po = self.play_order_map.get(entries[-1], None)
|
||||
if po is None:
|
||||
self.play_order_counter += 1
|
||||
po = self.play_order_counter
|
||||
parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
|
||||
play_order=po, author=auth, description=desc)
|
||||
parent.add_item('%sindex.html'%adir, None,
|
||||
a.title if a.title else _('Untitled Article'),
|
||||
play_order=po, author=auth,
|
||||
description=desc, toc_thumbnail=tt)
|
||||
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
|
||||
for sp in a.sub_pages:
|
||||
prefix = os.path.commonprefix([opf_path, sp])
|
||||
@ -555,7 +871,7 @@ class MPRecipe(BasicNewsRecipe):
|
||||
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
|
||||
templ = self.navbar.generate(True, num, j, len(f),
|
||||
not self.has_single_feed,
|
||||
a.orig_url, self.publisher, prefix=prefix,
|
||||
a.orig_url, __appname__, prefix=prefix,
|
||||
center=self.center_navbar)
|
||||
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
|
||||
body.insert(len(body.contents), elem)
|
||||
@ -578,7 +894,7 @@ class MPRecipe(BasicNewsRecipe):
|
||||
if not desc:
|
||||
desc = None
|
||||
feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
|
||||
f.title, play_order=po, description=desc, author=auth))
|
||||
f.title, play_order=po, description=desc, author=auth))
|
||||
|
||||
else:
|
||||
entries.append('feed_%d/index.html'%0)
|
||||
@ -592,3 +908,4 @@ class MPRecipe(BasicNewsRecipe):
|
||||
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
|
||||
opf.render(opf_file, ncx_file)
|
||||
|
||||
|
||||
|
76
recipes/money_pl.recipe
Normal file
76
recipes/money_pl.recipe
Normal file
@ -0,0 +1,76 @@
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class FocusRecipe(BasicNewsRecipe):
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = u'intromatyk <intromatyk@gmail.com>'
|
||||
language = 'pl'
|
||||
version = 1
|
||||
|
||||
title = u'Money.pl'
|
||||
category = u'News'
|
||||
description = u'Informacje finansowe z kraju i ze świata. Aktualne i archiwalne: notowania giełdowe, kursy walut, wskaźniki gospodarcze.'
|
||||
remove_empty_feeds= True
|
||||
no_stylesheets=True
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100000
|
||||
recursions = 0
|
||||
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
|
||||
simultaneous_downloads = 2
|
||||
|
||||
r = re.compile('.*(?P<url>http:\/\/(www.money.pl)|(rss.feedsportal.com\/c)\/.*\.html?).*')
|
||||
keep_only_tags =[]
|
||||
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'artykul'}))
|
||||
remove_tags = [dict(name='ul', attrs={'class':'socialStuff'})]
|
||||
|
||||
extra_css = '''
|
||||
body {font-family: Arial,Helvetica,sans-serif ;}
|
||||
h1{text-align: left;}
|
||||
h2{font-size: medium; font-weight: bold;}
|
||||
p.lead {font-weight: bold; text-align: left;}
|
||||
.authordate {font-size: small; color: #696969;}
|
||||
.fot{font-size: x-small; color: #666666;}
|
||||
'''
|
||||
|
||||
feeds = [
|
||||
('Wiadomosci z kraju', 'http://money.pl.feedsportal.com/c/33900/f/612847/index.rss'),
|
||||
('Wiadomosci ze swiata', 'http://money.pl.feedsportal.com/c/33900/f/612848/index.rss'),
|
||||
('Gospodarka', 'http://money.pl.feedsportal.com/c/33900/f/612849/index.rss'),
|
||||
('Waluty', 'http://money.pl.feedsportal.com/c/33900/f/612850/index.rss'),
|
||||
('Gielda', 'http://money.pl.feedsportal.com/c/33900/f/612851/index.rss'),
|
||||
('Banki', 'http://money.pl.feedsportal.com/c/33900/f/612852/index.rss'),
|
||||
('Fundusze', 'http://money.pl.feedsportal.com/c/33900/f/612853/index.rss'),
|
||||
('Emerytury', 'http://money.pl.feedsportal.com/c/33900/f/612854/index.rss'),
|
||||
('Podatki', 'http://money.pl.feedsportal.com/c/33900/f/612855/index.rss'),
|
||||
('Ubezpieczenia', 'http://money.pl.feedsportal.com/c/33900/f/612856/index.rss'),
|
||||
('Poradniki', 'http://money.pl.feedsportal.com/c/33900/f/612857/index.rss'),
|
||||
('Raporty', 'http://money.pl.feedsportal.com/c/33900/f/612858/index.rss'),
|
||||
('Motoryzacja', 'http://money.pl.feedsportal.com/c/33900/f/612859/index.rss'),
|
||||
('Manager', 'http://money.pl.feedsportal.com/c/33900/f/612860/index.rss'),
|
||||
('Dla firm', 'http://money.pl.feedsportal.com/c/33900/f/612861/index.rss'),
|
||||
('Prawo', 'http://money.pl.feedsportal.com/c/33900/f/612862/index.rss'),
|
||||
('Nieruchomosci', 'http://money.pl.feedsportal.com/c/33900/f/612863/index.rss'),
|
||||
('Praca', 'http://money.pl.feedsportal.com/c/33900/f/612864/index.rss'),
|
||||
|
||||
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
if url.count ('money.pl.feedsportal.com'):
|
||||
u = url.find('0Cartykul0C')
|
||||
u = 'http://www.m.money.pl/wiadomosci/artykul/' + url[u + 21:]
|
||||
u = u.replace('0C', '/')
|
||||
u = u.replace('A', '')
|
||||
u = u.replace ('0E','-')
|
||||
u = u.replace ('0P',';')
|
||||
u = u.replace ('0H',',')
|
||||
u = u.replace ('0B','.')
|
||||
u = u.replace (',0,',',-1,')
|
||||
u = u.replace('0Tutm0Isource0Frss0Gutm0Imedium0Frss0Gutm0Icampaign0Frss/story01.htm', '')
|
||||
else:
|
||||
u = url.replace('/nc/1','/do-druku/1')
|
||||
return u
|
@ -1,9 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2009-2011, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
moneynews.newsmax.com
|
||||
www.moneynews.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
@ -12,40 +10,40 @@ class MoneyNews(BasicNewsRecipe):
|
||||
title = 'Moneynews.com'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Financial news worldwide'
|
||||
publisher = 'moneynews.com'
|
||||
language = 'en'
|
||||
|
||||
publisher = 'Newsmax.com'
|
||||
language = 'en'
|
||||
category = 'news, finances, USA, business'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1252'
|
||||
encoding = 'utf8'
|
||||
extra_css = 'img{display: block} body{font-family: Arial, Helvetica, sans-serif}'
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_tables=True'
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
, 'linearize_tables' : True
|
||||
}
|
||||
|
||||
feeds = [
|
||||
(u'Street Talk' , u'http://moneynews.newsmax.com/xml/streettalk.xml' )
|
||||
,(u'Finance News' , u'http://moneynews.newsmax.com/xml/FinanceNews.xml' )
|
||||
,(u'Economy' , u'http://moneynews.newsmax.com/xml/economy.xml' )
|
||||
,(u'Companies' , u'http://moneynews.newsmax.com/xml/companies.xml' )
|
||||
,(u'Markets' , u'http://moneynews.newsmax.com/xml/Markets.xml' )
|
||||
,(u'Investing & Analysis' , u'http://moneynews.newsmax.com/xml/investing.xml' )
|
||||
(u'Street Talk' , u'http://www.moneynews.com/rss/StreetTalk/8.xml' )
|
||||
,(u'Finance News' , u'http://www.moneynews.com/rss/FinanceNews/4.xml' )
|
||||
,(u'Economy' , u'http://www.moneynews.com/rss/Economy/2.xml' )
|
||||
,(u'Companies' , u'http://www.moneynews.com/rss/Companies/6.xml' )
|
||||
,(u'Markets' , u'http://www.moneynews.com/rss/Markets/7.xml' )
|
||||
,(u'Investing & Analysis' , u'http://www.moneynews.com/rss/InvestingAnalysis/17.xml')
|
||||
]
|
||||
|
||||
|
||||
keep_only_tags = [dict(name='table', attrs={'class':'copy'})]
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'copy'})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='td' , attrs={'id':'article_fontsize'})
|
||||
,dict(name='table', attrs={'id':'toolbox' })
|
||||
,dict(name='tr' , attrs={'id':'noprint3' })
|
||||
dict(attrs={'class':['MsoNormal', 'MsoNoSpacing']}),
|
||||
dict(name=['object','link','embed','form','meta'])
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
nodeid = url.rpartition('/')[2]
|
||||
return 'http://www.moneynews.com/PrintTemplate?nodeid=' + nodeid
|
||||
|
143
recipes/mwjournal.recipe
Normal file
143
recipes/mwjournal.recipe
Normal file
@ -0,0 +1,143 @@
|
||||
##
|
||||
## Title: Microwave Journal RSS recipe
|
||||
## Contact: Kiavash (use Mobile Read)
|
||||
##
|
||||
## License: GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html
|
||||
## Copyright: Kiavash
|
||||
##
|
||||
## Written: Jan 2012
|
||||
## Last Edited: Jan 2012
|
||||
##
|
||||
|
||||
__license__ = 'GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html'
|
||||
__copyright__ = 'Kiavash'
|
||||
__author__ = 'Kaivash'
|
||||
|
||||
'''
|
||||
Microwave Journal Monthly Magazine
|
||||
You need to sign up (free) and get username/password.
|
||||
'''
|
||||
|
||||
import re # Import the regular expressions module.
|
||||
from calibre.ptempfile import TemporaryFile # we need this for saving to a temp file
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class MWJournal(BasicNewsRecipe):
|
||||
# Title to use for the ebook.
|
||||
title = u'Microwave Journal'
|
||||
__author__ = 'Kiavash'
|
||||
language = 'en'
|
||||
|
||||
#A brief description for the ebook.
|
||||
description = u'Microwave Journal web site ebook created using rss feeds.'
|
||||
|
||||
# Set publisher and publication type.
|
||||
publisher = 'Horizon House'
|
||||
publication_type = 'magazine'
|
||||
|
||||
oldest_article = 31 # monthly published magazine. Some months are 31 days!
|
||||
max_articles_per_feed = 100
|
||||
remove_empty_feeds = True
|
||||
auto_cleanup = True
|
||||
|
||||
# Disable stylesheets and javascript from site.
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
|
||||
asciiize = True # Converts all none ascii characters to their ascii equivalents
|
||||
|
||||
needs_subscription = True # oh yeah... we need to login btw.
|
||||
|
||||
# Timeout for fetching files from the server in seconds. The default of 120 seconds, seems somewhat excessive.
|
||||
timeout = 30
|
||||
|
||||
# Specify extra CSS - overrides ALL other CSS (IE. Added last).
|
||||
|
||||
extra_css = 'body { font-family: verdana, helvetica, sans-serif; } \
|
||||
.introduction, .first { font-weight: bold; } \
|
||||
.cross-head { font-weight: bold; font-size: 125%; } \
|
||||
.cap, .caption { display: block; font-size: 80%; font-style: italic; } \
|
||||
.cap, .caption, .caption img, .caption span { display: block; margin: 5px auto; } \
|
||||
.byl, .byd, .byline img, .byline-name, .byline-title, .author-name, .author-position, \
|
||||
.correspondent-portrait img, .byline-lead-in, .name, .bbc-role { display: block; \
|
||||
font-size: 80%; font-style: italic; margin: 1px auto; } \
|
||||
.story-date, .published { font-size: 80%; } \
|
||||
table { width: 100%; } \
|
||||
td img { display: block; margin: 5px auto; } \
|
||||
ul { padding-top: 10px; } \
|
||||
ol { padding-top: 10px; } \
|
||||
li { padding-top: 5px; padding-bottom: 5px; } \
|
||||
h1 { font-size: 175%; font-weight: bold; } \
|
||||
h2 { font-size: 150%; font-weight: bold; } \
|
||||
h3 { font-size: 125%; font-weight: bold; } \
|
||||
h4, h5, h6 { font-size: 100%; font-weight: bold; }'
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':'boxadzonearea350'}), # Removes banner ads
|
||||
dict(name='font', attrs={'class':'footer'}), # remove fonts if you do like your fonts more! Comment out to use website's fonts
|
||||
dict(name='div', attrs={'class':'newsarticlead'})
|
||||
]
|
||||
|
||||
# Remove various tag attributes to improve the look of the ebook pages.
|
||||
remove_attributes = [ 'border', 'cellspacing', 'align', 'cellpadding', 'colspan',
|
||||
'valign', 'vspace', 'hspace', 'alt', 'width', 'height' ]
|
||||
|
||||
# Remove the line breaks as well as href links. Books don't have links generally speaking
|
||||
preprocess_regexps = [(re.compile(r'<br[ ]*/>', re.IGNORECASE), lambda m: ''),
|
||||
(re.compile(r'<br[ ]*clear.*/>', re.IGNORECASE), lambda m: ''),
|
||||
(re.compile(r'<a.*?>'), lambda h1: ''),
|
||||
(re.compile(r'</a>'), lambda h2: '')
|
||||
]
|
||||
|
||||
# Select the feeds that you are interested.
|
||||
feeds = [
|
||||
(u'Current Issue', u'http://www.mwjournal.com/rss/Rss.asp?type=99'),
|
||||
(u'Industry News', u'http://www.mwjournal.com/rss/Rss.asp?type=1'),
|
||||
(u'Resources', u'http://www.mwjournal.com/rss/Rss.asp?type=3'),
|
||||
(u'Buyer\'s Guide', u'http://www.mwjournal.com/rss/Rss.asp?type=5'),
|
||||
(u'Events', u'http://www.mwjournal.com/rss/Rss.asp?type=2'),
|
||||
(u'All Updates', u'http://www.mwjournal.com/rss/Rss.asp?type=0'),
|
||||
]
|
||||
|
||||
# No magazine is complete without cover. Let's get it then!
|
||||
# The function is adapted from the Economist recipe
|
||||
def get_cover_url(self):
|
||||
cover_url = None
|
||||
cover_page_location = 'http://www.mwjournal.com/Journal/' # Cover image is located on this page
|
||||
soup = self.index_to_soup(cover_page_location)
|
||||
cover_item = soup.find('img',attrs={'src':lambda x: x and '/IssueImg/3_MWJ_CurrIss_CoverImg' in x}) # There are three files named cover, we want the highest resolution which is the 3rd image. So we look for the pattern. Remember that the name of the cover image changes every month so we cannot search for the complete name. Instead we are searching for the pattern
|
||||
if cover_item:
|
||||
cover_url = 'http://www.mwjournal.com' + cover_item['src'].strip() # yeah! we found it. Let's fetch the image file and pass it as cover to calibre
|
||||
return cover_url
|
||||
|
||||
def print_version(self, url):
|
||||
if url.find('/Journal/article.asp?HH_ID=') >= 0:
|
||||
return self.browser.open_novisit(url).geturl().replace('/Journal/article.asp?HH_ID=', '/Journal/Print.asp?Id=')
|
||||
elif url.find('/News/article.asp?HH_ID=') >= 0:
|
||||
return self.browser.open_novisit(url).geturl().replace('/News/article.asp?HH_ID=', '/Journal/Print.asp?Id=')
|
||||
elif url.find('/Resources/TechLib.asp?HH_ID=') >= 0:
|
||||
return self.browser.open_novisit(url).geturl().replace('/Resources/TechLib.asp?HH_ID=', '/Resources/PrintRessource.asp?Id=')
|
||||
|
||||
def get_browser(self):
|
||||
'''
|
||||
Microwave Journal website, directs the login page to omeda.com once login info is submitted, omeda.com redirects to mwjournal.com with again the browser logs in into that site (hidden from the user). To overcome this obsticle, first login page is fetch and its output is stored to an HTML file. Then the HTML file is opened again and second login form is submitted (Many thanks to Barty which helped with second page login).
|
||||
'''
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
if self.username is not None and self.password is not None:
|
||||
url = ('http://www.omeda.com/cgi-win/mwjreg.cgi?m=login') # main login page.
|
||||
br.open(url) # fetch the 1st login page
|
||||
br.select_form('login') # finds the login form
|
||||
br['EMAIL_ADDRESS'] = self.username # fills the username
|
||||
br['PASSWORD'] = self.password # fills the password
|
||||
raw = br.submit().read() # submit the form and read the 2nd login form
|
||||
# save it to an htm temp file (from ESPN recipe written by Kovid Goyal kovid@kovidgoyal.net
|
||||
with TemporaryFile(suffix='.htm') as fname:
|
||||
with open(fname, 'wb') as f:
|
||||
f.write(raw)
|
||||
br.open_local_file(fname)
|
||||
br.select_form(nr=0) # finds submit on the 2nd form
|
||||
didwelogin = br.submit().read() # submit it and read the return html
|
||||
if 'Welcome ' not in didwelogin: # did it login successfully? Is Username/password correct?
|
||||
raise Exception('Failed to login, are you sure your username and password are correct?')
|
||||
#login is done
|
||||
return br
|
@ -1,14 +1,25 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
##
|
||||
## Title: New Journal of Physics
|
||||
## License: GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html
|
||||
## Copyright: Chema Cort\xe9s
|
||||
##
|
||||
## Written: Jan 2011
|
||||
## Last Edited: Jan 2012 - by Kiavash
|
||||
##
|
||||
|
||||
__license__ = 'GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html'
|
||||
__copyright__ = u'Chema Cort\xe9s - 2011-01-05'
|
||||
__version__ = 'v0.01'
|
||||
__date__ = '2011-01-05'
|
||||
__version__ = 'v0.5.0'
|
||||
__date__ = '2012-01-13'
|
||||
|
||||
'''
|
||||
njp.org
|
||||
'''
|
||||
|
||||
import re # Import the regular expressions module.
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class NewJournalOfPhysics(BasicNewsRecipe):
|
||||
@ -19,14 +30,60 @@ class NewJournalOfPhysics(BasicNewsRecipe):
|
||||
category = 'physics, journal, science'
|
||||
language = 'en'
|
||||
|
||||
oldest_article = 30
|
||||
max_articles_per_feed = 100
|
||||
|
||||
keep_only_tags = [dict(id=['fulltextContainer'])]
|
||||
no_stylesheets=True
|
||||
use_embedded_content=False
|
||||
|
||||
feeds = [(u'Latest Papers', u'http://iopscience.iop.org/1367-2630/?rss=1')]
|
||||
|
||||
cover_url = 'http://images.iop.org/journals_icons/Info/1367-2630/cover.gif'
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 30
|
||||
timeout = 30
|
||||
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
remove_empty_feeds = True
|
||||
asciiize = True # Converts all none ascii characters to their ascii equivalents
|
||||
|
||||
keep_only_tags = [
|
||||
dict(id=['articleEvoContainer']),
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':'affiliations'}), # Removes Shoow Affiliations
|
||||
dict(name='div', attrs={'class':'abst-icon-links'}), # Removes Tags and PDF export
|
||||
dict(name='p', attrs={'class':'studyimage'}), # remove Studay image
|
||||
dict(name='a', attrs={'class':'icon powerpoint'}), # remove Export to PowerPoint Slide
|
||||
dict(name='a', attrs={'title':'CrossRef'}), # remove CrossRef icon
|
||||
dict(name='a', attrs={'title':'PubMed'}), # remove PubMed icon
|
||||
dict(name='a', attrs={'e4f5426941':'true'}), # remove cross ref image
|
||||
dict(name='img', attrs={'src':''}), # remove empty image
|
||||
dict(name='a', attrs={'class':'closeChap'}), # remove 'Close'
|
||||
dict(name='ul', attrs={'class':'breadcrumbs'}), # remove Top breadcrumbs
|
||||
]
|
||||
|
||||
extra_css = 'body { font-family: verdana, helvetica, sans-serif; } \
|
||||
.introduction, .first { font-weight: bold; } \
|
||||
.cross-head { font-weight: bold; font-size: 125%; } \
|
||||
.cap, .caption { display: block; font-size: 80%; font-style: italic; } \
|
||||
.cap, .caption, .caption img, .caption span { display: block; margin: 5px auto; } \
|
||||
.byl, .byd, .byline img, .byline-name, .byline-title, .author-name, .author-position, \
|
||||
.correspondent-portrait img, .byline-lead-in, .name, .bbc-role { display: block; \
|
||||
font-size: 80%; font-style: italic; margin: 1px auto; } \
|
||||
.story-date, .published { font-size: 80%; } \
|
||||
table { width: 100%; } \
|
||||
td img { display: block; margin: 5px auto; } \
|
||||
ul { padding-top: 10px; } \
|
||||
ol { padding-top: 10px; } \
|
||||
li { padding-top: 5px; padding-bottom: 5px; } \
|
||||
h1 { font-size: 175%; font-weight: bold; } \
|
||||
h2 { font-size: 150%; font-weight: bold; } \
|
||||
h3 { font-size: 125%; font-weight: bold; } \
|
||||
h4, h5, h6 { font-size: 100%; font-weight: bold; }'
|
||||
|
||||
# Remove the line breaks.
|
||||
preprocess_regexps = [(re.compile(r'<br[ ]*/>', re.IGNORECASE), lambda m: ''),
|
||||
(re.compile(r'<br[ ]*clear.*/>', re.IGNORECASE), lambda m: ''),
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return url+"/fulltext"
|
||||
return url+"/article"
|
54
recipes/nol.recipe
Normal file
54
recipes/nol.recipe
Normal file
@ -0,0 +1,54 @@
|
||||
################################################################################
|
||||
#Description: http://nol.hu/ RSS channel
|
||||
#Author: Bigpapa (bigpapabig@hotmail.com)
|
||||
#Date: 2011.12.18. - V1.1
|
||||
################################################################################
|
||||
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class NOL(BasicNewsRecipe):
|
||||
title = u'NOL'
|
||||
__author__ = 'Bigpapa'
|
||||
oldest_article = 5
|
||||
max_articles_per_feed = 5 # Az adott e-bookban tarolt cikkek feedenkenti maximalis szamat adja meg.
|
||||
no_stylesheets = True
|
||||
#delay = 1
|
||||
use_embedded_content = False
|
||||
encoding = 'utf8'
|
||||
language = 'hu'
|
||||
publication_type = 'newsportal'
|
||||
|
||||
conversion_options ={
|
||||
'linearize_tables' : True,
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='table', attrs={'class':['article-box']})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
|
||||
dict(name='div', attrs={'class':['h','ad-container-outer','tags noborder','ad-container-inner','image-container-lead','tags','related-container']}),
|
||||
dict(name='h4'),
|
||||
dict(name='tfoot'),
|
||||
dict(name='td', attrs={'class':['foot']}),
|
||||
dict(name='span', attrs={'class':['image-container-caption']}),
|
||||
]
|
||||
|
||||
|
||||
feeds = [
|
||||
# (u'V\xe1logat\xe1s', 'http://nol.hu/feed/valogatas.rss'),
|
||||
(u'Belf\xf6ld', 'http://nol.hu/feed/belfold.rss'),
|
||||
(u'K\xfclf\xf6ld', 'http://nol.hu/feed/kulfold.rss'),
|
||||
(u'Gazdas\xe1g', 'http://nol.hu/feed/gazdasag.rss'),
|
||||
(u'V\xe9lem\xe9ny', 'http://nol.hu/feed/velemeny.rss'),
|
||||
(u'Kult\xfara', 'http://nol.hu/feed/kult.rss'),
|
||||
(u'Tud/Tech', 'http://nol.hu/feed/tud-tech.rss'),
|
||||
(u'Sport', 'http://nol.hu/feed/sport.rss'),
|
||||
(u'Noller', 'http://nol.hu/feed/noller.rss'),
|
||||
(u'Mozaik', 'http://nol.hu/feed/mozaik.rss'),
|
||||
(u'Utaz\xe1s', 'http://nol.hu/feed/utazas.rss'),
|
||||
(u'Aut\xf3', 'http://nol.hu/feed/auto.rss'),
|
||||
(u'Voks', 'http://nol.hu/feed/voks.rss'),
|
||||
|
||||
]
|
100
recipes/novilist_novine_hr.recipe
Normal file
100
recipes/novilist_novine_hr.recipe
Normal file
@ -0,0 +1,100 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
novine.novilist.hr
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class NoviList_hr(BasicNewsRecipe):
|
||||
title = 'Novi List'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Vijesti iz Hrvatske'
|
||||
publisher = 'NOVI LIST d.d.'
|
||||
category = 'Novi list, politika, hrvatski dnevnik, Novine, Hrvatska, Croatia, News, newspaper, Hrvatski,Primorje, dnevni list, Rijeka'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
encoding = 'cp1250'
|
||||
use_embedded_content = False
|
||||
language = 'hr'
|
||||
remove_empty_feeds = True
|
||||
publication_type = 'newspaper'
|
||||
needs_subscription = True
|
||||
masthead_url = 'http://novine.novilist.hr/images/system/novilist-logo.jpg'
|
||||
index = 'http://novine.novilist.hr/'
|
||||
extra_css = """
|
||||
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
||||
body{font-family: Geneva,Arial,Helvetica,Swiss,sans1,sans-serif }
|
||||
img{display:block; margin-bottom: 0.4em; margin-top: 0.4em}
|
||||
.nadnaslov,.podnaslov{font-size: small; display: block; margin-bottom: 1em}
|
||||
.naslov{font-size: x-large; color: maroon; font-weight: bold; display: block; margin-bottom: 1em;}
|
||||
p{display: block}
|
||||
"""
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
, 'linearize_tables' : True
|
||||
}
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='td', attrs={'class':['nadnaslov', 'naslov', 'podnaslov']}),
|
||||
dict(name='font', attrs={'face':'Geneva,Arial,Helvetica,Swiss'})
|
||||
]
|
||||
|
||||
remove_tags = [dict(name=['meta', 'link', 'iframe', 'embed', 'object'])]
|
||||
remove_attributes=['border', 'lang', 'size', 'face', 'bgcolor']
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open(self.index + 'loginnow.asp')
|
||||
br.select_form(nr=0)
|
||||
br['username'] = self.username
|
||||
br['password'] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
def parse_index(self):
|
||||
articles = []
|
||||
count = 0
|
||||
soup = self.index_to_soup(self.index)
|
||||
#cover url
|
||||
for alink in soup.findAll('a'):
|
||||
if alink['href'].startswith('images/clanci/DOC_'):
|
||||
self.cover_url = self.index + alink['href']
|
||||
#feeds
|
||||
for item in soup.findAll('td',attrs={'class':'tocrubrika'}):
|
||||
count = count +1
|
||||
if self.test and count > 2:
|
||||
return articles
|
||||
aitem = item.a
|
||||
section = self.tag_to_string(aitem)
|
||||
feedlink = self.index + aitem['href']
|
||||
feedpage = self.index_to_soup(feedlink)
|
||||
self.report_progress(0, _('Fetching feed')+' %s...'%(section))
|
||||
inarts = []
|
||||
for alink in feedpage.findAll('a',attrs={'class':'naslovlinkdesno'}):
|
||||
url = self.index + alink['href']
|
||||
inarts.append({
|
||||
'title' :self.tag_to_string(alink)
|
||||
,'date' :strftime(self.timefmt)
|
||||
,'url' :url
|
||||
,'description':''
|
||||
})
|
||||
if self.remove_empty_feeds:
|
||||
if inarts:
|
||||
articles.append((section,inarts))
|
||||
else:
|
||||
articles.append((section,inarts))
|
||||
return articles
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('?WCI=Rubrike&','?WCI=Pretrazivac&')
|
49
recipes/novilist_portal_hr.recipe
Normal file
49
recipes/novilist_portal_hr.recipe
Normal file
@ -0,0 +1,49 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.novilist.hr
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class NoviList_Portal_hr(BasicNewsRecipe):
|
||||
title = 'Novi List - online portal'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Portal Novog Lista'
|
||||
publisher = 'NOVI LIST d.d.'
|
||||
category = 'Novi list, politika, hrvatski dnevnik, Novine, Hrvatska, Croatia, News, newspaper, Hrvatski,Primorje, dnevni list, Rijeka'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
encoding = 'utf8'
|
||||
use_embedded_content = False
|
||||
language = 'hr'
|
||||
publication_type = 'newsportal'
|
||||
masthead_url = 'http://www.novilist.hr/design/novilist/images/logo-print.gif'
|
||||
extra_css = """
|
||||
body{font-family: Geneva,Arial,Helvetica,Swiss,sans-serif }
|
||||
h1{font-family: Georgia,serif}
|
||||
img{display:block; margin-bottom: 0.4em; margin-top: 0.4em}
|
||||
"""
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
, 'linearize_tables' : True
|
||||
}
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'content'})]
|
||||
|
||||
remove_tags = [dict(name=['meta', 'link', 'iframe', 'embed', 'object'])]
|
||||
remove_attributes=['border', 'lang']
|
||||
|
||||
feeds = [(u'Vijesti', u'http://www.novilist.hr/rss/feed/sve.xml')]
|
||||
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('http://www.novilist.hr/','http://www.novilist.hr/layout/set/print/')
|
@ -325,7 +325,8 @@ class NYTimes(BasicNewsRecipe):
|
||||
'''
|
||||
def get_the_soup(docEncoding, url_or_raw, raw=False) :
|
||||
if re.match(r'\w+://', url_or_raw):
|
||||
f = self.browser.open(url_or_raw)
|
||||
br = self.clone_browser(self.browser)
|
||||
f = br.open_novisit(url_or_raw)
|
||||
_raw = f.read()
|
||||
f.close()
|
||||
if not _raw:
|
||||
|
@ -364,7 +364,8 @@ class NYTimes(BasicNewsRecipe):
|
||||
'''
|
||||
def get_the_soup(docEncoding, url_or_raw, raw=False) :
|
||||
if re.match(r'\w+://', url_or_raw):
|
||||
f = self.browser.open(url_or_raw)
|
||||
br = self.clone_browser(self.browser)
|
||||
f = br.open_novisit(url_or_raw)
|
||||
_raw = f.read()
|
||||
f.close()
|
||||
if not _raw:
|
||||
|
77
recipes/opinion_bo.recipe
Normal file
77
recipes/opinion_bo.recipe
Normal file
@ -0,0 +1,77 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Piet van Oostrum <piet@vanoostrum.org>'
|
||||
'''
|
||||
www.opinion.com.bo
|
||||
'''
|
||||
|
||||
from calibre import strftime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Opinion_Bol(BasicNewsRecipe):
|
||||
title = u'Opinión - Bolivia'
|
||||
__author__ = 'Piet van Oostrum'
|
||||
description = u'Opinión diario de circulación nacional, Cochabamba, Bolivia'
|
||||
publisher = 'Coboce Ltda - Editora Opinión'
|
||||
category = 'news, politics, Bolivia'
|
||||
version = 1
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 20
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
use_embedded_content = False
|
||||
language = 'es_BO'
|
||||
publication_type = 'newspaper'
|
||||
delay = 1
|
||||
remove_empty_feeds = True
|
||||
|
||||
cover_url = strftime('http://www.opinion.com.bo/opinion/articulos/%Y/%m%d/fotos/portada_650.jpg')
|
||||
masthead_url = 'http://opinion.com.bo/opinion/articulos/imagenes/logo_opinion.gif'
|
||||
extra_css = """body{font-family: Helvetica,Arial,sans-serif}
|
||||
.seccion_encabezado_nota_inte{font-size: 1.1em;
|
||||
font-weight: bold;}
|
||||
.autor_nota_inte{color: #999999; font-size: 0.8em;
|
||||
margin-bottom: 0.5em; text-align: right;}
|
||||
.pie{font-size: 0.8em;}"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'columna_izq_nota_intererior'})]
|
||||
|
||||
remove_tags = [dict(name=['meta','link','form','iframe','embed','object','style']),
|
||||
dict(name='div', attrs={'class':'ocultar'})]
|
||||
remove_attributes = ['width','height']
|
||||
|
||||
feeds = [
|
||||
(u'El País' , u'http://www.opinion.com.bo/opinion/rss/el_pais_rss.xml' )
|
||||
,(u'Cochabamba' , u'http://www.opinion.com.bo/opinion/rss/cochabamba_rss.xml' )
|
||||
,(u'Economía' , u'http://www.opinion.com.bo/opinion/rss/economia_rss.xml' )
|
||||
,(u'Cultura' , u'http://www.opinion.com.bo/opinion/rss/cultura_rss.xml' )
|
||||
,(u'Mundo' , u'http://www.opinion.com.bo/opinion/rss/mundo_rss.xml' )
|
||||
,(u'Ciencia y Tecnología', u'http://www.opinion.com.bo/opinion/rss/ciencia_tecnologia_rss.xml' )
|
||||
,(u'Policial' , u'http://www.opinion.com.bo/opinion/rss/policial_rss.xml' )
|
||||
,(u'Editorial' , u'http://www.opinion.com.bo/opinion/rss/editorial_rss.xml' )
|
||||
,(u'Subeditorial' , u'http://www.opinion.com.bo/opinion/rss/subeditorial_rss.xml' )
|
||||
,(u'Opinión' , u'http://www.opinion.com.bo/opinion/rss/opinion_rss.xml' )
|
||||
,(u'Deportes' , u'http://www.opinion.com.bo/opinion/rss/deportes_rss.xml')
|
||||
,(u' Vida de hoy' , u'http://www.opinion.com.bo/opinion/rss/vidadehoy_rss.xml' )
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
# Filter out today's articles
|
||||
# maybe should take timezone into account
|
||||
|
||||
today = strftime('/%Y/%m%d/')
|
||||
def get_article_url(self, article):
|
||||
link = article.link
|
||||
if self.today in link:
|
||||
return link
|
196
recipes/oreilly_premium.recipe
Normal file
196
recipes/oreilly_premium.recipe
Normal file
@ -0,0 +1,196 @@
|
||||
# Talking Points is not grabbing everything.
|
||||
# The look is right, but only the last one added?
|
||||
import re
|
||||
import time
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
# Allows the Python soup converter, which makes parsing easier.
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
# strip ads and graphics
|
||||
# Current Column lacks a title.
|
||||
# Talking Points Memo - shorten title - Remove year and Bill's name
|
||||
# The News letter archive https://www.billoreilly.com/newsletterarchive is covered by other entries.
|
||||
# Newsletters: Talking Points Memos covered by cat12
|
||||
|
||||
class OReillyPremium(BasicNewsRecipe):
|
||||
title = u'OReilly Premium'
|
||||
__author__ = 'TMcN'
|
||||
description = 'Retrieves Premium and News Letter content from BillOReilly.com. Requires a Bill OReilly Premium Membership.'
|
||||
cover_url = 'http://images.billoreilly.com/images/headers/billgray_header.png'
|
||||
auto_cleanup = True
|
||||
encoding = 'utf8'
|
||||
needs_subscription = True
|
||||
no_stylesheets = True
|
||||
oldest_article = 20
|
||||
remove_javascript = True
|
||||
remove_tags = [dict(name='img', attrs={})]
|
||||
# Don't go down
|
||||
recursions = 0
|
||||
max_articles_per_feed = 2000
|
||||
|
||||
debugMessages = True
|
||||
|
||||
# Name, URL, Soup FindAll Attr if relevant (last two are special case), articleList
|
||||
catList = [ ["TV Archives", 'https://www.billoreilly.com/show?action=tvShowArchive', 'a', {'class':['showLinks','homeLinks']}, []],
|
||||
["No Spin Archives", 'https://www.billoreilly.com/blog?categoryID=7', True, {'class':['blogBody'], 'style':['padding-top:10px;']}, []],
|
||||
["Daily Briefings", 'http://www.billoreilly.com/blog?categoryID=11', True, {'class':['defaultHeaderSmallLinks']}, []],
|
||||
["Stratfor", 'http://www.billoreilly.com/blog?categoryID=5', 'a', {'class':['blogLinks']}, []],
|
||||
["Talking Points Memo", 'https://www.billoreilly.com/blog?categoryID=12', 'td', {}, []],
|
||||
["Current Column", 'https://www.billoreilly.com/currentcolumn', 'span', {'class':['defaultHeader']}, []]
|
||||
]
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('https://www.billoreilly.com/pg/jsp/member/membersignin.jsp')
|
||||
br.select_form(name='login')
|
||||
br['formEmailField'] = self.username
|
||||
br['formPasswordField'] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
# Returns the best-guess print url.
|
||||
# The second parameter (pageURL) is returned if nothing is found.
|
||||
def extractPrintURL(self, baseURL, pageURL, printString):
|
||||
tagURL = pageURL
|
||||
soup = self.index_to_soup(pageURL)
|
||||
if soup :
|
||||
printText = soup.find('a', text=printString)
|
||||
else :
|
||||
print("Failed to find Print string "+printString+ " in "+pageURL)
|
||||
if printText:
|
||||
tag = printText.parent
|
||||
tagURL = baseURL+tag['href']
|
||||
return tagURL
|
||||
|
||||
def stripBadChars(self, inString) :
|
||||
return inString.replace("\'", "")
|
||||
|
||||
def parseGeneric(self, baseURL):
|
||||
# Does a generic parsing of the articles. There are six categories (0-5)
|
||||
# Name, URL, Soup FindAll Attr if relevant (last two are special case), articleList
|
||||
# NoSpin and TV are generic
|
||||
fullReturn = []
|
||||
for i in range(len(self.catList)) :
|
||||
articleList = []
|
||||
soup = self.index_to_soup(self.catList[i][1])
|
||||
# Set defaults
|
||||
description = 'None'
|
||||
pubdate = time.strftime('%a, %d %b')
|
||||
# Problem: 0-2 create many in an array
|
||||
# 3-5 create one.
|
||||
# So no for-div for 3-5
|
||||
|
||||
if i < 3 :
|
||||
for div in soup.findAll(self.catList[i][2], self.catList[i][3]):
|
||||
print(div)
|
||||
if i == 1:
|
||||
a = div.find('a', href=True)
|
||||
else :
|
||||
a = div
|
||||
print(a)
|
||||
summary = div.find(True, attrs={'class':'summary'})
|
||||
if summary:
|
||||
description = self.tag_to_string(summary, use_alt=False)
|
||||
if not a:
|
||||
continue
|
||||
# url = baseURL+re.sub(r'\?.*', '', a['href'])
|
||||
url = baseURL+a['href']
|
||||
if i < 2 :
|
||||
url = self.extractPrintURL(baseURL, url, "Print this entry")
|
||||
title = self.tag_to_string(a, use_alt=True).strip()
|
||||
elif i == 2 :
|
||||
# Daily Briefs
|
||||
url = self.extractPrintURL(baseURL, url, "Print this entry")
|
||||
title = div.contents[0]
|
||||
if self.debugMessages :
|
||||
print(title+" @ "+url)
|
||||
articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
|
||||
|
||||
elif i == 3 : # Stratfor
|
||||
a = soup.find('a', self.catList[i][3])
|
||||
if a is None :
|
||||
continue
|
||||
url = baseURL+a['href']
|
||||
title = self.tag_to_string(a, use_alt=True).strip()
|
||||
# Get Stratfor contents so we can get the real title.
|
||||
stratSoup = self.index_to_soup(url)
|
||||
title = stratSoup.html.head.title.string
|
||||
stratIndex = title.find('Stratfor.com:', 0)
|
||||
if (stratIndex > -1) :
|
||||
title = title[stratIndex+14:-1]
|
||||
# Look for first blogBody <td class="blogBody"
|
||||
# Changed 12 Jan 2012 - new page format
|
||||
#stratBlogTable = stratSoup.find('td', {'class':['blogBody']}).findParent('table')
|
||||
#stratBody = stratSoup.find('td', {'class':['blogBody']})
|
||||
elif i == 4 : # Talking Points
|
||||
topDate = soup.find("td", "blogBody")
|
||||
if not topDate :
|
||||
print("Failed to find date in Talking Points")
|
||||
# This page has the contents in double-wrapped tables!
|
||||
myTable = topDate.findParents('table')[0]
|
||||
if myTable is not None:
|
||||
upOneTable = myTable.findParents('table')[0]
|
||||
if upOneTable is not None:
|
||||
upTwo = upOneTable.findParents('table')[0]
|
||||
if upTwo is None:
|
||||
continue
|
||||
# Now navigate rows of upTwo
|
||||
if self.debugMessages :
|
||||
print("Entering rows")
|
||||
for rows in upTwo.findChildren("tr", recursive=False):
|
||||
# Inside top level table, each row is an article
|
||||
rowTable = rows.find("table")
|
||||
articleTable = rowTable.find("table")
|
||||
# This looks wrong.
|
||||
articleTable = rows.find("tr")
|
||||
# The middle table is just for formatting the article buffer... but this means we can skip the inner table.
|
||||
blogDate = articleTable.find("a","blogDate").contents[0]
|
||||
# Skip to second blogBody for this.
|
||||
blogTitle = articleTable.findAll("td", "blogBody")[1].contents[0]
|
||||
blogURL = articleTable.find("a", "homeBlogReadMore bold")['href']
|
||||
url = baseURL+re.sub(r'\?.*', '', blogURL)
|
||||
title = blogDate+": "+self.stripBadChars(blogTitle.replace("Bill O'Reilly: ", ""))
|
||||
if self.debugMessages :
|
||||
print("Talking Points Memo title "+title+" at url: "+url)
|
||||
pubdate = time.strftime('%a, %d %b')
|
||||
articleList.append(dict(title=title, url=url, date=pubdate, description='None', content=''))
|
||||
else : # Current Column
|
||||
titleSpan = soup.find(self.catList[i][2], self.catList[i][3])
|
||||
if titleSpan is None :
|
||||
continue
|
||||
title = titleSpan.contents[0]
|
||||
url = self.extractPrintURL(baseURL, self.catList[i][1], "Print This Article")
|
||||
if i == 3 or i == 5 :
|
||||
if self.debugMessages :
|
||||
print(self.catList[i][0]+" Title:"+title+" at url: "+url)
|
||||
summary = div.find(True, attrs={'class':'summary'})
|
||||
if summary:
|
||||
description = self.tag_to_string(summary, use_alt=False)
|
||||
articleList.append(dict(title=title, url=url, date=pubdate, description=description, content=''))
|
||||
self.catList[i][3] = articleList
|
||||
fullReturn.append((self.catList[i][0], articleList))
|
||||
return fullReturn
|
||||
|
||||
# calibre.web.feeds.news.BasicNewsRecipe.parse_index() fetches the list of articles.
|
||||
# returns a list of tuple ('feed title', list of articles)
|
||||
# {
|
||||
# 'title' : article title,
|
||||
# 'url' : URL of print version,
|
||||
# 'date' : The publication date of the article as a string,
|
||||
# 'description' : A summary of the article
|
||||
# 'content' : The full article (can be an empty string). This is used by FullContentProfile
|
||||
# }
|
||||
# this is used instead of BasicNewsRecipe.parse_feeds().
|
||||
def parse_index(self):
|
||||
# Parse the page into Python Soup
|
||||
baseURL = "https://www.billoreilly.com"
|
||||
return self.parseGeneric(baseURL)
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
refresh = soup.find('meta', {'http-equiv':'refresh'})
|
||||
if refresh is None:
|
||||
return soup
|
||||
content = refresh.get('content').partition('=')[2]
|
||||
raw = self.browser.open('https://www.billoreilly.com'+content).read()
|
||||
return BeautifulSoup(raw.decode('cp1252', 'replace'))
|
||||
|
@ -1,12 +1,10 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
pagina12.com.ar
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
|
||||
class Pagina12(BasicNewsRecipe):
|
||||
title = 'Pagina - 12'
|
||||
@ -66,9 +64,7 @@ class Pagina12(BasicNewsRecipe):
|
||||
return url.replace('http://www.pagina12.com.ar/','http://www.pagina12.com.ar/imprimir/')
|
||||
|
||||
def get_cover_url(self):
|
||||
rawc = self.index_to_soup('http://www.pagina12.com.ar/diario/principal/diario/index.html',True)
|
||||
rawc2 = re.sub(r'PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN','PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"',rawc)
|
||||
soup = BeautifulSoup(rawc2,fromEncoding=self.encoding,smartQuotesTo=None)
|
||||
soup = self.index_to_soup('http://www.pagina12.com.ar/diario/principal/diario/index.html')
|
||||
for image in soup.findAll('img',alt=True):
|
||||
if image['alt'].startswith('Tapa de la fecha'):
|
||||
return image['src']
|
||||
|
14
recipes/pambianco.recipe
Normal file
14
recipes/pambianco.recipe
Normal file
@ -0,0 +1,14 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1326135591(BasicNewsRecipe):
|
||||
title = u'Pambianco'
|
||||
description = 'fashion magazine for professional people'
|
||||
language = 'it'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
|
||||
feeds = [(u'Pambianco', u'http://feeds.feedburner.com/pambianconews/YGXu')]
|
||||
__author__ = 'faber1971'
|
||||
__version__ = 'v1.0'
|
||||
__date__ = '9, January 2011'
|
@ -33,3 +33,6 @@ class BasicUserRecipe1314970845(BasicNewsRecipe):
|
||||
(u'Obituaries', u'http://www.philly.com/inquirer_obituaries.rss')
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '?viewAll=y'
|
||||
|
||||
|
79
recipes/prospectmaguk.recipe
Normal file
79
recipes/prospectmaguk.recipe
Normal file
@ -0,0 +1,79 @@
|
||||
#!/usr/bin/env python
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
|
||||
'''
|
||||
calibre recipe for prospectmagazine.co.uk (subscription)
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class ProspectMagUK(BasicNewsRecipe):
|
||||
title = u'Prospect Magazine'
|
||||
description = 'A general-interest publication offering analysis and commentary about politics, news and business.'
|
||||
__author__ = 'barty, duluoz'
|
||||
timefmt = ' [%d %B %Y]'
|
||||
no_stylesheets = True
|
||||
publication_type = 'magazine'
|
||||
masthead_url = 'http://www.prospectmagazine.co.uk/wp-content/themes/prospect/images/titleMain.jpg'
|
||||
category = 'news, UK'
|
||||
language = 'en_GB'
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
needs_subscription = True
|
||||
|
||||
auto_cleanup_keep = '//div[@class="lead_image"]'
|
||||
remove_tags = [{'class':['shareinpost','postutils','postinfo']}]
|
||||
|
||||
INDEX = 'http://www.prospectmagazine.co.uk/current-issue'
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
if self.username is not None and self.password is not None:
|
||||
br.open('http://www.prospectmagazine.co.uk/wp-login.php')
|
||||
br.select_form(name='loginform')
|
||||
br['log'] = self.username
|
||||
br['pwd'] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
#div = soup.find('h1',text=re.compile(r'Issue \d+'))
|
||||
#fname = self.tag_to_string( div) if div is not None else 'Current Issue'
|
||||
div = soup.find('div', id='cover_image')
|
||||
if div is not None:
|
||||
img = div.find('img', src=True)
|
||||
if img is not None:
|
||||
src = img['src']
|
||||
if src.startswith('/'):
|
||||
src = 'http://www.prospectmagazine.co.uk' + src
|
||||
self.cover_url = src
|
||||
feeds = []
|
||||
# loop through sections
|
||||
for sect in soup.findAll('div',attrs={'class':'sectionheading'}):
|
||||
fname = self.tag_to_string( sect).replace('>','').strip()
|
||||
self.log('Found section', fname)
|
||||
articles = []
|
||||
|
||||
# note: can't just find siblings with class='post' because that will also
|
||||
# grab all the articles belonging to the sections that follow.
|
||||
for item in sect.findNextSiblings('div',attrs={'class':True}):
|
||||
if not 'post' in item['class']: break
|
||||
a = item.find('a', href=True)
|
||||
if a is None: continue
|
||||
url = a['href']
|
||||
title = self.tag_to_string(a)
|
||||
p = item.find('p')
|
||||
desc = self.tag_to_string( p) if p is not None else ''
|
||||
art = {'title':title, 'description':desc,'date':' ', 'url':url}
|
||||
p = item.find(attrs={'class':re.compile('author')})
|
||||
self.log('\tFound article:', title, '::', url)
|
||||
if p is not None:
|
||||
art['author'] = self.tag_to_string( p).strip()
|
||||
articles.append(art)
|
||||
|
||||
feeds.append((fname, articles))
|
||||
return feeds
|
65
recipes/rionegro.recipe
Normal file
65
recipes/rionegro.recipe
Normal file
@ -0,0 +1,65 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.rionegro.com.ar
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class RioNegro(BasicNewsRecipe):
|
||||
title = 'Diario Rio Negro'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Noticias desde la Patagonia Argentina y el resto del mundo'
|
||||
publisher = 'Editorial Rio Negro SA.'
|
||||
category = 'news, politics, Argentina'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
encoding = 'utf8'
|
||||
use_embedded_content = False
|
||||
language = 'es_AR'
|
||||
remove_empty_feeds = True
|
||||
publication_type = 'newspaper'
|
||||
masthead_url = 'http://www.rionegro.com.ar/diario/imagenes/logorn.gif'
|
||||
extra_css = """
|
||||
body{font-family: Arial,Helvetica,sans-serif }
|
||||
img{display:block}
|
||||
h1 {font-size: 0.89em; color: red}
|
||||
h2 {font-family: Georgia,"Times New Roman",Times,serif; font-size: 1.8em}
|
||||
h3 {font-family: Georgia,"Times New Roman",Times,serif; border-bottom: 2px solid gray}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['meta','link','iframe','object','embed'])
|
||||
,dict(name='div', attrs={'class':'logo'})
|
||||
]
|
||||
keep_only_tags=[dict(attrs={'class':'nota'})]
|
||||
remove_attributes=['lang']
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Argentina' , u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9532')
|
||||
,(u'El Mundo' , u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9533')
|
||||
,(u'Carta de lectores', u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9538')
|
||||
,(u'Columnistas' , u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9539')
|
||||
,(u'Domingo a Domingo', u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9541')
|
||||
,(u'Editorial' , u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9542')
|
||||
,(u'Deportes' , u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9522')
|
||||
,(u'Espectaculos' , u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9523')
|
||||
,(u'Sociedad' , u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9574')
|
||||
,(u'Policiales' , u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9525')
|
||||
,(u'Municipales' , u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9862')
|
||||
,(u'Region' , u'http://www.rionegro.com.ar/diario/funciones/xml/rss.aspx?idcat=9701')
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
idart_raw = url.rpartition('idart=')[2]
|
||||
idart = idart_raw.rpartition('&')[0]
|
||||
return 'http://www.rionegro.com.ar/diario/rn/print.aspx?idArt=' + idart + '&tipo=2'
|
17
recipes/salonica_press_news.recipe
Normal file
17
recipes/salonica_press_news.recipe
Normal file
@ -0,0 +1,17 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class spn(BasicNewsRecipe):
|
||||
title = u'Salonica Press News'
|
||||
language = 'gr'
|
||||
__author__ = "SteliosGero"
|
||||
oldest_article = 3
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
category = 'news, GR'
|
||||
language = 'el'
|
||||
|
||||
|
||||
feeds = [(u'\u03a0\u03bf\u03bb\u03b9\u03c4\u03b9\u03ba\u03ae', u'http://www.spnews.gr/politiki?format=feed&type=rss'), (u'\u039f\u03b9\u03ba\u03bf\u03bd\u03bf\u03bc\u03af\u03b1', u'http://www.spnews.gr/oikonomia?format=feed&type=rss'), (u'\u0391\u03c5\u03c4\u03bf\u03b4\u03b9\u03bf\u03af\u03ba\u03b7\u03c3\u03b7', u'http://www.spnews.gr/aftodioikisi?format=feed&type=rss'), (u'\u039a\u03bf\u03b9\u03bd\u03c9\u03bd\u03af\u03b1', u'http://www.spnews.gr/koinonia?format=feed&type=rss'), (u'\u0391\u03b8\u03bb\u03b7\u03c4\u03b9\u03c3\u03bc\u03cc\u03c2', u'http://www.spnews.gr/sports?format=feed&type=rss'), (u'\u0394\u03b9\u03b5\u03b8\u03bd\u03ae', u'http://www.spnews.gr/diethni?format=feed&type=rss'), (u'\u03a0\u03bf\u03bb\u03b9\u03c4\u03b9\u03c3\u03bc\u03cc\u03c2', u'http://www.spnews.gr/politismos?format=feed&type=rss'), (u'Media', u'http://www.spnews.gr/media-news?format=feed&type=rss'), (u'\u0396\u03c9\u03ae', u'http://www.spnews.gr/zoi?format=feed&type=rss'), (u'\u03a4\u03b5\u03c7\u03bd\u03bf\u03bb\u03bf\u03b3\u03af\u03b1', u'http://spnews.gr/texnologia?format=feed&type=rss'), (u'\u03a0\u03b5\u03c1\u03b9\u03b2\u03ac\u03bb\u03bb\u03bf\u03bd', u'http://spnews.gr/periballon?format=feed&type=rss'), (u'\u03a0\u03b1\u03c1\u03b1\u03c0\u03bf\u03bb\u03b9\u03c4\u03b9\u03ba\u03ac', u'http://spnews.gr/parapolitika?format=feed&type=rss'), (u'\u03a0\u03b1\u03c1\u03b1\u03b4\u03b7\u03bc\u03bf\u03c4\u03b9\u03ba\u03ac', u'http://spnews.gr/paradimotika?format=feed&type=rss'), (u'\u03a0\u03b1\u03c1\u03b1\u03b1\u03b8\u03bb\u03b7\u03c4\u03b9\u03ba\u03ac', u'http://spnews.gr/parathlitika?format=feed&type=rss'), (u'\u0391\u03c0\u03cc\u03c8\u03b5\u03b9\u03c2', u'http://spnews.gr/apopseis?format=feed&type=rss'), (u'\u03a3\u03c5\u03bd\u03b5\u03cd\u03be\u03b5\u03b9\u03c2', u'http://spnews.gr/synenteykseis?format=feed&type=rss'), (u'Alert!', u'http://spnews.gr/alert?format=feed&type=rss')]
|
||||
|
||||
def print_version(self, url):
|
||||
return url+'?tmpl=component&print=1&layout=default&page='
|
@ -12,6 +12,7 @@ class MercuryNews(BasicNewsRecipe):
|
||||
title = 'San Jose Mercury News'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'News from San Jose'
|
||||
cover_url = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg12/lg/CA_SJMN.jpg'
|
||||
publisher = 'San Jose Mercury News'
|
||||
category = 'news, politics, USA, San Jose, California'
|
||||
oldest_article = 2
|
||||
|
@ -12,6 +12,7 @@ class SeattleTimes(BasicNewsRecipe):
|
||||
title = 'The Seattle Times'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'News from Seattle and USA'
|
||||
cover_url = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg12/lg/WA_ST.jpg'
|
||||
publisher = 'The Seattle Times'
|
||||
category = 'news, politics, USA'
|
||||
oldest_article = 2
|
||||
@ -20,6 +21,8 @@ class SeattleTimes(BasicNewsRecipe):
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1252'
|
||||
language = 'en'
|
||||
auto_cleanup = True
|
||||
auto_cleanup_keep = '//div[@id="PhotoContainer"]'
|
||||
|
||||
feeds = [
|
||||
(u'Top Stories',
|
||||
@ -69,24 +72,4 @@ class SeattleTimes(BasicNewsRecipe):
|
||||
u'http://seattletimes.nwsource.com/rss/mostreadarticles.xml'),
|
||||
]
|
||||
|
||||
keep_only_tags = [dict(id='content')]
|
||||
remove_tags = [
|
||||
dict(name=['object','link','script']),
|
||||
{'class':['permission', 'note', 'bottomtools',
|
||||
'homedelivery']},
|
||||
dict(id=["rightcolumn", 'footer', 'adbottom']),
|
||||
]
|
||||
|
||||
def print_version(self, url):
|
||||
return url
|
||||
start_url, sep, rest_url = url.rpartition('_')
|
||||
rurl, rsep, article_id = start_url.rpartition('/')
|
||||
return u'http://seattletimes.nwsource.com/cgi-bin/PrintStory.pl?document_id=' + article_id
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
mtag = '<meta http-equiv="Content-Language" content="en-US"/>'
|
||||
soup.head.insert(0,mtag)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
|
491
recipes/singtaohk.recipe
Normal file
491
recipes/singtaohk.recipe
Normal file
@ -0,0 +1,491 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Eddie Lau'
|
||||
|
||||
# data source: normal, mobile
|
||||
__Source__ = 'mobile'
|
||||
# please replace the following "True" with "False". (Default: True)
|
||||
__MakePeriodical__ = True
|
||||
# Turn below to True if your device supports display of CJK titles (Default: False)
|
||||
__UseChineseTitle__ = False
|
||||
# Set it to False if you want to skip images (Default: True)
|
||||
__KeepImages__ = True
|
||||
# Set it to True if you want to include a summary in Kindle's article view (Default: False)
|
||||
__IncludeSummary__ = False
|
||||
# Set it to True if you want thumbnail images in Kindle's article view (Default: True)
|
||||
__IncludeThumbnails__ = True
|
||||
|
||||
|
||||
'''
|
||||
Change Log:
|
||||
2011/12/29 -- first version done
|
||||
TODO:
|
||||
* use alternative source at http://m.singtao.com/index.php
|
||||
'''
|
||||
|
||||
from calibre.utils.date import now as nowf
|
||||
import os, datetime, re
|
||||
from datetime import date
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from contextlib import nested
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
from calibre.ebooks.metadata.toc import TOC
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
from calibre.utils.localization import canonicalize_lang
|
||||
|
||||
# MAIN CLASS
|
||||
class STHKRecipe(BasicNewsRecipe):
|
||||
if __UseChineseTitle__ == True:
|
||||
title = u'\u661f\u5cf6\u65e5\u5831 (\u9999\u6e2f)'
|
||||
else:
|
||||
title = 'Sing Tao Daily - Hong Kong'
|
||||
description = 'Hong Kong Chinese Newspaper (http://singtao.com)'
|
||||
category = 'Chinese, News, Hong Kong'
|
||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px; max-height:90%;} td[class=caption] {font-size:50%;} td[class=bodyhead]{font-weight:bold; font-size:150%;} td[class=stmobheadline]{font-weight:bold; font-size:150%;}'
|
||||
masthead_url = 'http://upload.wikimedia.org/wikipedia/en/d/dd/Singtao-usa.png'
|
||||
if __Source__ == 'normal':
|
||||
keep_only_tags = [dict(name='td', attrs={'class':['bodyhead','bodytext']})]
|
||||
else:
|
||||
keep_only_tags = [dict(name='td', attrs={'class':['stmobheadline']}),
|
||||
dict(name='img', attrs={'width':['146']}),
|
||||
dict(name='td', attrs={'class':['bodytextg']}),
|
||||
]
|
||||
if __KeepImages__:
|
||||
remove_tags = [dict(name='hr')]
|
||||
else:
|
||||
remove_tags = [dict(name='hr'), dict(name='img')]
|
||||
remove_attributes = ['align']
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<font class="bodytext">', re.DOTALL|re.IGNORECASE),
|
||||
lambda match: '<br><br><font class="bodytext">'),
|
||||
]
|
||||
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 200
|
||||
__author__ = 'Eddie Lau'
|
||||
publisher = 'Sing Tao Ltd.'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
language = 'zh'
|
||||
encoding = 'Big5-HKSCS'
|
||||
recursions = 0
|
||||
conversion_options = {'linearize_tables':True}
|
||||
timefmt = ''
|
||||
auto_cleanup = False
|
||||
|
||||
def get_dtlocal(self):
|
||||
dt_utc = datetime.datetime.utcnow()
|
||||
# convert UTC to local hk time - at HKT 4.00am, all news are available
|
||||
dt_local = dt_utc + datetime.timedelta(8.0/24) - datetime.timedelta(4.0/24)
|
||||
return dt_local
|
||||
|
||||
def get_fetchdate(self):
|
||||
return self.get_dtlocal().strftime("%Y%m%d")
|
||||
|
||||
def get_fetchformatteddate(self):
|
||||
return self.get_dtlocal().strftime("%Y-%m-%d")
|
||||
|
||||
def get_fetchyear(self):
|
||||
return self.get_dtlocal().strftime("%Y")
|
||||
|
||||
def get_fetchmonth(self):
|
||||
return self.get_dtlocal().strftime("%m")
|
||||
|
||||
def get_fetchday(self):
|
||||
return self.get_dtlocal().strftime("%d")
|
||||
|
||||
def get_cover_url(self):
|
||||
#cover = 'http://singtao.com/media/a/a(2660).jpg' # for 2011/12/29
|
||||
base = 2660
|
||||
todaydate = date(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()))
|
||||
diff = todaydate - date(2011, 12, 29)
|
||||
base = base + int(diff.total_seconds()/(3600*24))
|
||||
cover = 'http://singtao.com/media/a/a(' + str(base) +').jpg'
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
try:
|
||||
br.open(cover)
|
||||
except:
|
||||
cover = 'http://singtao.com/images/stlogo.gif'
|
||||
return cover
|
||||
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
dateStr = self.get_fetchdate()
|
||||
dateStr
|
||||
|
||||
if __Source__ == 'normal':
|
||||
# single-item section
|
||||
for title, url in [(u'\u793e\u8ad6 Editorial', 'http://singtao.com/yesterday/jou/j_index.html')]:
|
||||
article = self.parse_singleitem_section(url)
|
||||
if article:
|
||||
feeds.append((title, article))
|
||||
|
||||
# multiple items
|
||||
# for title, url in [(u'\u8981\u805e\u6e2f\u805e Local', 'http://singtao.com/yesterday/loc/a_index.html'),
|
||||
# (u'\u8ca1\u7d93 Finance', 'http://singtao.com/yesterday/fin/d_index.html'),
|
||||
# (u'\u5730\u7522 Properties', 'http://singtao.com/yesterday/pro/h_index.html'),
|
||||
# (u'\u6559\u80b2 Education', 'http://singtao.com/yesterday/edu/g_index.asp'),
|
||||
# (u'\u5a1b\u6a02 Entertainment', 'http://singtao.com/yesterday/ent/f_index.html'),
|
||||
# (u'\u9ad4\u80b2 Sports', 'http://singtao.com/yesterday/spo/c_index.html'),
|
||||
# (u'\u99ac\u7d93 Horse Racing', 'http://singtao.com/yesterday/rac/n_index.html')
|
||||
# ]:
|
||||
# articles = self.parse_section(url)
|
||||
# if articles:
|
||||
# feeds.append((title, articles))
|
||||
|
||||
# special: supplement
|
||||
# for title, url, baseurl in [(u'\u526f\u520a Supplements', 'http://singtao.com/yesterday/sup/m_index.html', '/')]:
|
||||
# articles = self.parse_section_withouttext(url, baseurl)
|
||||
# if articles:
|
||||
# feeds.append((title, articles))
|
||||
|
||||
# multiple-item sections
|
||||
# for title, url in [(u'\u570b\u969b World', 'http://singtao.com/yesterday/int/b_index.html'),
|
||||
# (u'\u4e2d\u570b China', 'http://singtao.com/yesterday/chi/e_index.html')
|
||||
# ]:
|
||||
# articles = self.parse_section(url)
|
||||
# if articles:
|
||||
# feeds.append((title, articles))
|
||||
|
||||
for title, url, baseurl in [(u'\u8981\u805e\u6e2f\u805e Local', 'http://singtao.com/yesterday/loc/a_index.html', '/'),
|
||||
(u'\u8ca1\u7d93 Finance', 'http://singtao.com/yesterday/fin/d_index.html', '/'),
|
||||
(u'\u5730\u7522 Properties', 'http://singtao.com/yesterday/pro/h_index.html', '/'),
|
||||
(u'\u6559\u80b2 Education', 'http://singtao.com/yesterday/edu/g_index.asp', '/'),
|
||||
(u'\u5a1b\u6a02 Entertainment', 'http://singtao.com/yesterday/ent/f_index.html', '/'),
|
||||
(u'\u9ad4\u80b2 Sports', 'http://singtao.com/yesterday/spo/c_index.html', '/'),
|
||||
(u'\u99ac\u7d93 Horse Racing', 'http://singtao.com/yesterday/rac/n_index.html', '/'),
|
||||
(u'\u526f\u520a Supplements', 'http://singtao.com/yesterday/sup/m_index.html', '/'),
|
||||
(u'\u570b\u969b World', 'http://singtao.com/yesterday/int/b_index.html', '/'),
|
||||
(u'\u4e2d\u570b China', 'http://singtao.com/yesterday/chi/e_index.html', '/')]:
|
||||
articles = self.parse_section_withouttext(url, baseurl)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
else: # use mobile
|
||||
# single-item section
|
||||
for title, url in [(u'\u793e\u8ad6 Editorial', 'http://m.singtao.com/showContent.php?main=paper&sub=0&title=0')]:
|
||||
article = self.parse_singleitem_section_m(url)
|
||||
if article:
|
||||
feeds.append((title, article))
|
||||
# multiple-item section
|
||||
for title, url, baseurl in [(u'\u8981\u805e\u6e2f\u805e Local', 'http://m.singtao.com/showTitle.php?main=paper&sub=1', 'http://m.singtao.com/'),
|
||||
(u'\u8ca1\u7d93 Finance', 'http://m.singtao.com/showTitle.php?main=paper&sub=2', 'http://m.singtao.com/'),
|
||||
(u'\u5730\u7522 Properties', 'http://m.singtao.com/showTitle.php?main=paper&sub=3', 'http://m.singtao.com/'),
|
||||
(u'\u6559\u80b2 Education', 'http://m.singtao.com/showTitle.php?main=paper&sub=4', 'http://m.singtao.com/'),
|
||||
(u'\u5a1b\u6a02 Entertainment', 'http://m.singtao.com/showTitle.php?main=paper&sub=5', 'http://m.singtao.com/'),
|
||||
(u'\u99ac\u7d93 Horse Racing', 'http://m.singtao.com/showTitle.php?main=paper&sub=6', 'http://m.singtao.com/'),
|
||||
(u'\u9ad4\u80b2 Sports', 'http://m.singtao.com/showTitle.php?main=paper&sub=7', 'http://m.singtao.com/'),
|
||||
(u'\u526f\u520a Supplements', 'http://m.singtao.com/showTitle.php?main=paper&sub=8', 'http://m.singtao.com/'),
|
||||
(u'\u570b\u969b World', 'http://m.singtao.com/showTitle.php?main=paper&sub=9', 'http://m.singtao.com/'),
|
||||
(u'\u4e2d\u570b China', 'http://m.singtao.com/showTitle.php?main=paper&sub=10', 'http://m.singtao.com/')]:
|
||||
articles = self.parse_multiitem_section_m(url, baseurl)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
return feeds
|
||||
|
||||
def parse_singleitem_section(self, url):
|
||||
current_articles = []
|
||||
current_articles.append({'title': '', 'url': url, 'description': '', 'date': ''})
|
||||
return current_articles
|
||||
|
||||
def parse_singleitem_section_m(self, url):
|
||||
current_articles = []
|
||||
current_articles.append({'title': '', 'url': url, 'description': '', 'date': ''})
|
||||
return current_articles
|
||||
|
||||
def parse_section(self, url):
|
||||
soup = self.index_to_soup(url)
|
||||
# find <table width=436 border=0 cellspacing=0 align=center cellpadding=0> tag
|
||||
tables = soup.findAll(name={'table'}, attrs={'width': ['436']})
|
||||
current_articles_all = []
|
||||
for table in tables:
|
||||
divs = table.findAll(name={'a'})
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
for i in divs:
|
||||
title = self.tag_to_string(i)
|
||||
urlstr = i.get('href', False)
|
||||
urlstr = url + '/../' + urlstr
|
||||
if urlstr not in included_urls:
|
||||
current_articles.append({'title': title, 'url': urlstr, 'description': '', 'date': ''})
|
||||
included_urls.append(urlstr)
|
||||
current_articles_all.extend(current_articles)
|
||||
return current_articles_all
|
||||
|
||||
def parse_section_withouttext(self, url, baseurl):
|
||||
soup = self.index_to_soup(url)
|
||||
# find all a tag
|
||||
links = soup.findAll(name={'a'})
|
||||
linksexcluded = soup.findAll(name={'a'}, attrs={'class':'secondhead'})
|
||||
for elink in linksexcluded:
|
||||
links.remove(elink)
|
||||
linksexcluded = soup.findAll(name={'a'}, attrs={'class':'second02'})
|
||||
for elink in linksexcluded:
|
||||
links.remove(elink)
|
||||
current_articles_all = []
|
||||
included_urls = []
|
||||
for link in links:
|
||||
title = self.tag_to_string(link)
|
||||
if len(title.strip()) > 0:
|
||||
urlstr = link.get('href', False)
|
||||
if urlstr.rfind(baseurl) == -1 and urlstr.rfind('mailto:') == -1:
|
||||
urlstr = url + '/../' + urlstr
|
||||
if urlstr not in included_urls:
|
||||
current_articles_all.append({'title': title, 'url': urlstr, 'description': '', 'date': ''})
|
||||
included_urls.append(urlstr)
|
||||
return current_articles_all
|
||||
|
||||
def parse_multiitem_section_m(self, url, baseurl):
|
||||
soup = self.index_to_soup(url)
|
||||
# find all a tag
|
||||
links = soup.findAll(name={'span'}, attrs={'class':'urlurl'})
|
||||
current_articles_all = []
|
||||
included_urls = []
|
||||
for linkraw in links:
|
||||
linkclean = soup.findAll(name={'a'})
|
||||
for link in linkclean:
|
||||
title = self.tag_to_string(link)
|
||||
if len(title.strip()) > 0:
|
||||
urlstr = link.get('href', False)
|
||||
urlstr = baseurl + urlstr
|
||||
if urlstr not in included_urls:
|
||||
current_articles_all.append({'title': title, 'url': urlstr, 'description': '', 'date': ''})
|
||||
included_urls.append(urlstr)
|
||||
return current_articles_all
|
||||
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
if __Source__ == 'normal':
|
||||
# get title if not fetched in parse_section() function
|
||||
if article.title == '' or len(article.title.strip()) == 0:
|
||||
articletitle = soup.findAll('td',attrs={'class':'bodyhead'})
|
||||
if articletitle:
|
||||
articletitlemod = articletitle[0].find('font')
|
||||
if articletitlemod:
|
||||
article.title = articletitlemod.string.strip()
|
||||
else:
|
||||
article.title = articletitle[0].string.strip()
|
||||
else:
|
||||
# use the title in the text in any case
|
||||
articletitle = soup.findAll('td', attrs={'class':'stmobheadline'})
|
||||
if articletitle:
|
||||
articletitle[0].br.extract()
|
||||
article.title = articletitle[0].contents[0]
|
||||
# get thumbnail image
|
||||
if __IncludeThumbnails__ and first and hasattr(self, 'add_toc_thumbnail'):
|
||||
img = soup.find('img')
|
||||
if img is not None:
|
||||
self.add_toc_thumbnail(article, img['src'])
|
||||
|
||||
try:
|
||||
if __IncludeSummary__ and len(article.text_summary.strip()) == 0:
|
||||
# look for content
|
||||
if __Source__ == 'normal':
|
||||
articlebodies = soup.findAll('font',attrs={'class':'bodytext'})
|
||||
else:
|
||||
articlebodies = soup.findAll('div', attrs={'class':'hkadj'})
|
||||
if articlebodies:
|
||||
for articlebody in articlebodies:
|
||||
if articlebody:
|
||||
# the text may or may not be enclosed in <p></p> tag
|
||||
paras = articlebody.findAll('p')
|
||||
if not paras:
|
||||
paras = articlebody
|
||||
textFound = False
|
||||
for p in paras:
|
||||
if not textFound:
|
||||
summary_candidate = self.tag_to_string(p).strip()
|
||||
if len(summary_candidate) > 0:
|
||||
summary_candidate = summary_candidate.replace(u'(\u661f\u5cf6\u65e5\u5831\u5831\u9053)', '', 1)
|
||||
article.summary = article.text_summary = summary_candidate
|
||||
textFound = True
|
||||
else:
|
||||
# display a simple text
|
||||
#article.summary = article.text_summary = u'\u66f4\u591a......'
|
||||
# display word counts
|
||||
counts = 0
|
||||
if __Source__ == 'normal':
|
||||
articlebodies = soup.findAll('font',attrs={'class':'bodytext'})
|
||||
else:
|
||||
articlebodies = soup.findAll('div', attrs={'class':'hkadj'})
|
||||
if articlebodies:
|
||||
for articlebody in articlebodies:
|
||||
# the text may or may not be enclosed in <p></p> tag
|
||||
paras = articlebody.findAll('p')
|
||||
if not paras:
|
||||
paras = articlebody
|
||||
for p in paras:
|
||||
summary_candidate = self.tag_to_string(p).strip()
|
||||
counts += len(summary_candidate)
|
||||
article.summary = article.text_summary = u'\uff08' + str(counts) + u'\u5b57\uff09'
|
||||
except:
|
||||
self.log("Error creating article descriptions")
|
||||
return
|
||||
|
||||
# override from the one in version 0.8.31
|
||||
def create_opf(self, feeds, dir=None):
|
||||
if dir is None:
|
||||
dir = self.output_dir
|
||||
title = self.short_title()
|
||||
# change 1: allow our own flag to tell if a periodical is to be generated
|
||||
# also use customed date instead of current time
|
||||
if __MakePeriodical__ == False or self.output_profile.periodical_date_in_title:
|
||||
title = title + ' ' + self.get_fetchformatteddate()
|
||||
# end of change 1
|
||||
# change 2: __appname__ replaced by newspaper publisher
|
||||
__appname__ = self.publisher
|
||||
mi = MetaInformation(title, [__appname__])
|
||||
mi.publisher = __appname__
|
||||
mi.author_sort = __appname__
|
||||
# change 3: use __MakePeriodical__ flag to tell if a periodical should be generated
|
||||
if __MakePeriodical__ == True:
|
||||
mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
|
||||
else:
|
||||
mi.publication_type = self.publication_type+':'+self.short_title()
|
||||
#mi.publication_type = 'periodical:'+self.publication_type+':'+self.short_title()
|
||||
# change 4: in the following, all the nowf() are changed to adjusted time
|
||||
# This one doesn't matter
|
||||
mi.timestamp = nowf()
|
||||
# change 5: skip listing the articles
|
||||
#article_titles, aseen = [], set()
|
||||
#for f in feeds:
|
||||
# for a in f:
|
||||
# if a.title and a.title not in aseen:
|
||||
# aseen.add(a.title)
|
||||
# article_titles.append(force_unicode(a.title, 'utf-8'))
|
||||
|
||||
#mi.comments = self.description
|
||||
#if not isinstance(mi.comments, unicode):
|
||||
# mi.comments = mi.comments.decode('utf-8', 'replace')
|
||||
#mi.comments += ('\n\n' + _('Articles in this issue: ') + '\n' +
|
||||
# '\n\n'.join(article_titles))
|
||||
|
||||
language = canonicalize_lang(self.language)
|
||||
if language is not None:
|
||||
mi.language = language
|
||||
# This one affects the pub date shown in kindle title
|
||||
#mi.pubdate = nowf()
|
||||
# now appears to need the time field to be > 12.00noon as well
|
||||
mi.pubdate = datetime.datetime(int(self.get_fetchyear()), int(self.get_fetchmonth()), int(self.get_fetchday()), 12, 30, 0)
|
||||
opf_path = os.path.join(dir, 'index.opf')
|
||||
ncx_path = os.path.join(dir, 'index.ncx')
|
||||
|
||||
opf = OPFCreator(dir, mi)
|
||||
# Add mastheadImage entry to <guide> section
|
||||
mp = getattr(self, 'masthead_path', None)
|
||||
if mp is not None and os.access(mp, os.R_OK):
|
||||
from calibre.ebooks.metadata.opf2 import Guide
|
||||
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
|
||||
ref.type = 'masthead'
|
||||
ref.title = 'Masthead Image'
|
||||
opf.guide.append(ref)
|
||||
|
||||
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
|
||||
manifest.append(os.path.join(dir, 'index.html'))
|
||||
manifest.append(os.path.join(dir, 'index.ncx'))
|
||||
|
||||
# Get cover
|
||||
cpath = getattr(self, 'cover_path', None)
|
||||
if cpath is None:
|
||||
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
|
||||
if self.default_cover(pf):
|
||||
cpath = pf.name
|
||||
if cpath is not None and os.access(cpath, os.R_OK):
|
||||
opf.cover = cpath
|
||||
manifest.append(cpath)
|
||||
|
||||
# Get masthead
|
||||
mpath = getattr(self, 'masthead_path', None)
|
||||
if mpath is not None and os.access(mpath, os.R_OK):
|
||||
manifest.append(mpath)
|
||||
|
||||
opf.create_manifest_from_files_in(manifest)
|
||||
for mani in opf.manifest:
|
||||
if mani.path.endswith('.ncx'):
|
||||
mani.id = 'ncx'
|
||||
if mani.path.endswith('mastheadImage.jpg'):
|
||||
mani.id = 'masthead-image'
|
||||
|
||||
entries = ['index.html']
|
||||
toc = TOC(base_path=dir)
|
||||
self.play_order_counter = 0
|
||||
self.play_order_map = {}
|
||||
|
||||
|
||||
def feed_index(num, parent):
|
||||
f = feeds[num]
|
||||
for j, a in enumerate(f):
|
||||
if getattr(a, 'downloaded', False):
|
||||
adir = 'feed_%d/article_%d/'%(num, j)
|
||||
auth = a.author
|
||||
if not auth:
|
||||
auth = None
|
||||
desc = a.text_summary
|
||||
if not desc:
|
||||
desc = None
|
||||
else:
|
||||
desc = self.description_limiter(desc)
|
||||
tt = a.toc_thumbnail if a.toc_thumbnail else None
|
||||
entries.append('%sindex.html'%adir)
|
||||
po = self.play_order_map.get(entries[-1], None)
|
||||
if po is None:
|
||||
self.play_order_counter += 1
|
||||
po = self.play_order_counter
|
||||
parent.add_item('%sindex.html'%adir, None,
|
||||
a.title if a.title else ('Untitled Article'),
|
||||
play_order=po, author=auth,
|
||||
description=desc, toc_thumbnail=tt)
|
||||
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
|
||||
for sp in a.sub_pages:
|
||||
prefix = os.path.commonprefix([opf_path, sp])
|
||||
relp = sp[len(prefix):]
|
||||
entries.append(relp.replace(os.sep, '/'))
|
||||
last = sp
|
||||
|
||||
if os.path.exists(last):
|
||||
with open(last, 'rb') as fi:
|
||||
src = fi.read().decode('utf-8')
|
||||
soup = BeautifulSoup(src)
|
||||
body = soup.find('body')
|
||||
if body is not None:
|
||||
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
|
||||
templ = self.navbar.generate(True, num, j, len(f),
|
||||
not self.has_single_feed,
|
||||
a.orig_url, __appname__, prefix=prefix,
|
||||
center=self.center_navbar)
|
||||
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
|
||||
body.insert(len(body.contents), elem)
|
||||
with open(last, 'wb') as fi:
|
||||
fi.write(unicode(soup).encode('utf-8'))
|
||||
if len(feeds) == 0:
|
||||
raise Exception('All feeds are empty, aborting.')
|
||||
|
||||
if len(feeds) > 1:
|
||||
for i, f in enumerate(feeds):
|
||||
entries.append('feed_%d/index.html'%i)
|
||||
po = self.play_order_map.get(entries[-1], None)
|
||||
if po is None:
|
||||
self.play_order_counter += 1
|
||||
po = self.play_order_counter
|
||||
auth = getattr(f, 'author', None)
|
||||
if not auth:
|
||||
auth = None
|
||||
desc = getattr(f, 'description', None)
|
||||
if not desc:
|
||||
desc = None
|
||||
feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
|
||||
f.title, play_order=po, description=desc, author=auth))
|
||||
|
||||
else:
|
||||
entries.append('feed_%d/index.html'%0)
|
||||
feed_index(0, toc)
|
||||
|
||||
for i, p in enumerate(entries):
|
||||
entries[i] = os.path.join(dir, p.replace('/', os.sep))
|
||||
opf.create_spine(entries)
|
||||
opf.set_toc(toc)
|
||||
|
||||
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
|
||||
opf.render(opf_file, ncx_file)
|
||||
|
||||
|
||||
|
12
recipes/sivil_dusunce.recipe
Normal file
12
recipes/sivil_dusunce.recipe
Normal file
@ -0,0 +1,12 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class BasicUserRecipe1324913680(BasicNewsRecipe):
|
||||
title = u'Sivil Dusunce'
|
||||
language = 'tr'
|
||||
__author__ = 'asalet_r'
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 20
|
||||
auto_cleanup = True
|
||||
|
||||
feeds = [(u'Sivil Dusunce', u'http://www.sivildusunce.com/feed/')]
|
@ -14,6 +14,7 @@ class TagesspiegelRSS(BasicNewsRecipe):
|
||||
language = 'de'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
publication_type = 'newspaper'
|
||||
|
||||
extra_css = '''
|
||||
.hcf-overline{color:#990000; font-family:Arial,Helvetica,sans-serif;font-size:xx-small;display:block}
|
||||
@ -33,17 +34,15 @@ class TagesspiegelRSS(BasicNewsRecipe):
|
||||
no_javascript = True
|
||||
remove_empty_feeds = True
|
||||
encoding = 'utf-8'
|
||||
remove_tags = [{'class':'hcf-header'}, {'class':'hcf-atlas'}, {'class':'hcf-date hcf-separate'}]
|
||||
|
||||
keep_only_tags = dict(name='div', attrs={'class':["hcf-article"]})
|
||||
remove_tags = [
|
||||
dict(name='link'), dict(name='iframe'),dict(name='style'),dict(name='meta'),dict(name='button'),
|
||||
dict(name='div', attrs={'class':["hcf-jump-to-comments","hcf-clear","hcf-magnify hcf-media-control",
|
||||
"hcf-socials-widgets hcf-socials-top","hcf-socials-widgets hcf-socials-bottom"] }),
|
||||
dict(name='span', attrs={'class':["hcf-mainsearch",] }),
|
||||
dict(name='ul', attrs={'class':["hcf-tools"]}),
|
||||
dict(name='ul', attrs={'class': re.compile('hcf-services')})
|
||||
]
|
||||
def print_version(self, url):
|
||||
url = url.split('/')
|
||||
url[-1] = 'v_print,%s?p='%url[-1]
|
||||
return '/'.join(url)
|
||||
|
||||
def get_masthead_url(self):
|
||||
return 'http://www.tagesspiegel.de/images/tsp_logo/3114/6.png'
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup('http://www.tagesspiegel.de/zeitung/')
|
||||
@ -56,7 +55,7 @@ class TagesspiegelRSS(BasicNewsRecipe):
|
||||
ans = []
|
||||
maincol = soup.find('div', attrs={'class':re.compile('hcf-main-col')})
|
||||
|
||||
for div in maincol.findAll(True, attrs={'class':['hcf-teaser', 'hcf-header', 'story headline']}):
|
||||
for div in maincol.findAll(True, attrs={'class':['hcf-teaser', 'hcf-header', 'story headline', 'hcf-teaser hcf-last']}):
|
||||
|
||||
if div['class'] == 'hcf-header':
|
||||
try:
|
||||
@ -66,7 +65,7 @@ class TagesspiegelRSS(BasicNewsRecipe):
|
||||
except:
|
||||
continue
|
||||
|
||||
elif div['class'] == 'hcf-teaser' and getattr(div.contents[0],'name','') == 'h2':
|
||||
elif div['class'] in ['hcf-teaser', 'hcf-teaser hcf-last'] and getattr(div.contents[0],'name','') == 'h2':
|
||||
a = div.find('a', href=True)
|
||||
if not a:
|
||||
continue
|
||||
|
12
recipes/tasfiye_dergisi.recipe
Normal file
12
recipes/tasfiye_dergisi.recipe
Normal file
@ -0,0 +1,12 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class BasicUserRecipe1324739957(BasicNewsRecipe):
|
||||
title = u'Tasfiye Dergisi'
|
||||
language = 'tr'
|
||||
__author__ = 'asalet_r'
|
||||
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 20
|
||||
auto_cleanup = True
|
||||
|
||||
feeds = [(u'Tasfiye Dergisi', u'http://www.tasfiyedergisi.com/direnen-edebiyat/?feed=rss2')]
|
25
recipes/tillsonburg.recipe
Normal file
25
recipes/tillsonburg.recipe
Normal file
@ -0,0 +1,25 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
'''
|
||||
Tillsonburg/Norfolk County newspapers Calibre Recipe
|
||||
'''
|
||||
class TillsonburgNorfolkCounty(BasicNewsRecipe):
|
||||
title = u'Tillsonburg/Norfolk County'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
__author__ = u'Eric Coolman'
|
||||
publisher = u'canoe.ca'
|
||||
description = u'Norfolk County and Tillsonburg, Ontario Canada Newspapers'
|
||||
category = u'News, Ontario, Canada'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
no_stylesheets = True
|
||||
language = 'en_CA'
|
||||
encoding = 'utf-8'
|
||||
|
||||
feeds = [
|
||||
(u'Simcoe Reformer', u'http://www.simcoereformer.ca/rss/'),
|
||||
(u'Delhi News-Record', u'http://www.delhinewsrecord.com/rss/'),
|
||||
(u'Tilsonburg News', u'http://www.tillsonburgnews.com/rss/')
|
||||
]
|
@ -1,4 +1,4 @@
|
||||
import re
|
||||
import re, urllib
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class TimesOfIndia(BasicNewsRecipe):
|
||||
@ -17,7 +17,9 @@ class TimesOfIndia(BasicNewsRecipe):
|
||||
]
|
||||
remove_tags = [
|
||||
{'class':re.compile('tabsintbgshow|prvnxtbg')},
|
||||
{'id':['fbrecommend', 'relmaindiv']}
|
||||
{'id':['fbrecommend', 'relmaindiv', 'shretxt', 'fbrecos', 'twtdiv',
|
||||
'gpls', 'auim']},
|
||||
{'class':['twitter-share-button', 'cmtmn']},
|
||||
]
|
||||
|
||||
feeds = [
|
||||
@ -46,25 +48,27 @@ class TimesOfIndia(BasicNewsRecipe):
|
||||
]
|
||||
|
||||
def get_article_url(self, article):
|
||||
# Times of India sometimes serves an ad page instead of the article,
|
||||
# this code, detects and circumvents that
|
||||
url = BasicNewsRecipe.get_article_url(self, article)
|
||||
if '/0Ltimesofindia' in url:
|
||||
url = url.partition('/0L')[-1]
|
||||
url = url.replace('0B', '.').replace('0N', '.com').replace('0C',
|
||||
'/').replace('0E', '-')
|
||||
url = 'http://' + url.rpartition('/')[0]
|
||||
match = re.search(r'/([0-9a-zA-Z]+?)\.cms', url)
|
||||
if match is not None:
|
||||
num = match.group(1)
|
||||
num = re.sub(r'[^0-9]', '', num)
|
||||
return ('http://timesofindia.indiatimes.com/articleshow/%s.cms?prtpage=1' %
|
||||
num)
|
||||
else:
|
||||
cms = re.search(r'/(\d+)\.cms', url)
|
||||
if cms is not None:
|
||||
return ('http://timesofindia.indiatimes.com/articleshow/%s.cms?prtpage=1' %
|
||||
cms.group(1))
|
||||
try:
|
||||
s = article.summary
|
||||
return urllib.unquote(
|
||||
re.search(r'href=".+?bookmark.cfm.+?link=(.+?)"', s).group(1))
|
||||
except:
|
||||
pass
|
||||
link = article.get('link', None)
|
||||
if link and link.split('/')[-1]=="story01.htm":
|
||||
link=link.split('/')[-2]
|
||||
encoding = {'0B': '.', '0C': '/', '0A': '0', '0F': '=', '0G': '&',
|
||||
'0D': '?', '0E': '-', '0N': '.com', '0L': 'http://'}
|
||||
for k, v in encoding.iteritems():
|
||||
link = link.replace(k, v)
|
||||
return link
|
||||
|
||||
return url
|
||||
def print_version(self, url):
|
||||
return url + '?prtpage=1'
|
||||
|
||||
def preprocess_html(self, soup, *args):
|
||||
byl = soup.find(attrs={'class':'byline'})
|
||||
if byl is not None:
|
||||
for l in byl.findAll('label'):
|
||||
l.extract()
|
||||
return soup
|
||||
|
66
recipes/tweakers_net.recipe
Normal file
66
recipes/tweakers_net.recipe
Normal file
@ -0,0 +1,66 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import with_statement
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Tweakers(BasicNewsRecipe):
|
||||
title = u'Tweakers.net - with Reactions'
|
||||
__author__ = 'Roedi06'
|
||||
language = 'nl'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
cover_url = 'http://img51.imageshack.us/img51/7470/tweakersnetebook.gif'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'columnwrapper news'}),
|
||||
{'id':'reacties'},
|
||||
]
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'id' : ['utracker']}),
|
||||
{'id' : ['channelNav']},
|
||||
{'id' : ['contentArea']},
|
||||
{'class' : ['breadCrumb']},
|
||||
{'class' : ['nextPrevious ellipsis']},
|
||||
{'class' : ['advertorial']},
|
||||
{'class' : ['sidebar']},
|
||||
{'class' : ['filterBox']},
|
||||
{'id' : ['toggleButtonTxt']},
|
||||
{'id' : ['socialButtons']},
|
||||
{'class' : ['button']},
|
||||
{'class' : ['textadTop']},
|
||||
{'class' : ['commentLink']},
|
||||
{'title' : ['Reageer op deze reactie']},
|
||||
{'class' : ['pageIndex']},
|
||||
{'class' : ['reactieHeader collapsed']},
|
||||
]
|
||||
no_stylesheets=True
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<hr*?>', re.IGNORECASE | re.DOTALL), lambda match : ''),
|
||||
(re.compile(r'<p>', re.IGNORECASE | re.DOTALL), lambda match : ''),
|
||||
(re.compile(r'</p>', re.IGNORECASE | re.DOTALL), lambda match : ''),
|
||||
(re.compile(r'<a.*?>'), lambda h1: '<b><u>'),
|
||||
(re.compile(r'</a>'), lambda h2: '</u></b>'),
|
||||
(re.compile(r'<span class="new">', re.IGNORECASE | re.DOTALL), lambda match : ''),
|
||||
(re.compile(r'</span>', re.IGNORECASE | re.DOTALL), lambda match : ''),
|
||||
(re.compile(r'<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_0'), lambda match : ' - moderated 0<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_0'),
|
||||
(re.compile(r'<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_1'), lambda match : ' - moderated +1<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_1'),
|
||||
(re.compile(r'<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_2'), lambda match : ' - moderated +2<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_2'),
|
||||
(re.compile(r'<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_3'), lambda match : ' - moderated +3<div class="moderation"><img src="http://tweakimg.net/g/if/comments/score_3'),
|
||||
(re.compile(r'<div class="moderation">.*?</div>'), lambda h1: ''),
|
||||
]
|
||||
|
||||
extra_css = '.reactieHeader { color: #333333; font-size: 6px; border-bottom:solid 2px #333333; border-top:solid 1px #333333; } \
|
||||
.reactieContent { font-family:"Times New Roman",Georgia,Serif; color: #000000; font-size: 8px; } \
|
||||
.quote { font-family:"Times New Roman",Georgia,Serif; padding-left:2px; border-left:solid 3px #666666; color: #666666; }'
|
||||
|
||||
|
||||
feeds = [(u'Tweakers.net', u'http://feeds.feedburner.com/tweakers/nieuws')]
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '?max=200'
|
||||
|
@ -11,8 +11,9 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class USAToday(BasicNewsRecipe):
|
||||
|
||||
title = 'USA Today'
|
||||
__author__ = 'calibre'
|
||||
__author__ = 'Kovid Goyal'
|
||||
description = 'newspaper'
|
||||
cover_url = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg12/lg/USAT.jpg'
|
||||
encoding = 'utf-8'
|
||||
publisher = 'usatoday.com'
|
||||
category = 'news, usa'
|
||||
@ -47,32 +48,7 @@ class USAToday(BasicNewsRecipe):
|
||||
('Offbeat News', 'http://rssfeeds.usatoday.com/UsatodaycomOffbeat-TopStories')
|
||||
]
|
||||
|
||||
keep_only_tags = [dict(attrs={'class':'story'})]
|
||||
|
||||
remove_tags = [
|
||||
dict(attrs={'class':[
|
||||
'share',
|
||||
'reprints',
|
||||
'inline-h3',
|
||||
'info-extras rounded',
|
||||
'inset',
|
||||
'ppy-outer',
|
||||
'ppy-caption',
|
||||
'comments',
|
||||
'jump',
|
||||
'pagetools',
|
||||
'post-attributes',
|
||||
'tags',
|
||||
'bottom-tools',
|
||||
'sponsoredlinks',
|
||||
'corrections'
|
||||
]}),
|
||||
dict(name='ul', attrs={'class':'inside-copy'}),
|
||||
dict(id=['pluck']),
|
||||
dict(id=['updated']),
|
||||
dict(id=['post-date-updated'])
|
||||
]
|
||||
|
||||
auto_cleanup = True
|
||||
|
||||
def get_masthead_url(self):
|
||||
masthead = 'http://i.usatoday.net/mobile/_common/_images/565x73_usat_mobile.gif'
|
||||
|
@ -1,5 +1,5 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2009-2012, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
www.variety.com
|
||||
'''
|
||||
@ -14,11 +14,11 @@ class Variety(BasicNewsRecipe):
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
encoding = 'cp1252'
|
||||
encoding = 'utf8'
|
||||
publisher = 'Red Business Information'
|
||||
category = 'Entertainment Industry News, Daily Variety, Movie Reviews, TV, Awards, Oscars, Cannes, Box Office, Hollywood'
|
||||
language = 'en'
|
||||
masthead_url = 'http://a330.g.akamai.net/7/330/23382/20090528190853/www.variety.com/graphics/variety/Variety_logo_green_tm.gif'
|
||||
masthead_url = 'http://images1.variety.com/graphics/variety/Variety_logo_green_tm.gif'
|
||||
extra_css = ' body{font-family: Georgia,"Times New Roman",Times,Courier,serif } img{margin-bottom: 1em} '
|
||||
|
||||
conversion_options = {
|
||||
@ -30,17 +30,10 @@ class Variety(BasicNewsRecipe):
|
||||
|
||||
remove_tags = [dict(name=['object','link','map'])]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'article'})]
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'art control'})]
|
||||
|
||||
feeds = [(u'News & Articles', u'http://feeds.feedburner.com/variety/headlines' )]
|
||||
|
||||
def print_version(self, url):
|
||||
rpt = url.rpartition('?')[0]
|
||||
artid = rpt.rpartition('/')[2]
|
||||
catidr = url.rpartition('categoryid=')[2]
|
||||
catid = catidr.partition('&')[0]
|
||||
return 'http://www.variety.com/index.asp?layout=print_story&articleid=' + artid + '&categoryid=' + catid
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
||||
rpt = url.rpartition('.html')[0]
|
||||
return rpt + '?printerfriendly=true'
|
||||
|
46
recipes/villagevoice.recipe
Normal file
46
recipes/villagevoice.recipe
Normal file
@ -0,0 +1,46 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class VillageVoice(BasicNewsRecipe):
|
||||
|
||||
title = 'Village Voice'
|
||||
feeds = [
|
||||
("Complete Issue", "http://villagevoice.com/syndication/issue"),
|
||||
("News", "http://villagevoice.com/syndication/section/news"),
|
||||
("Music", "http://villagevoice.com/syndication/section/music"),
|
||||
("Movies", "http://villagevoice.com/syndication/section/film"),
|
||||
#("Restaurants", "http://villagevoice.com/syndication/section/dining"),
|
||||
#("Music Events", "http://villagevoice.com/syndication/events?type=music"),
|
||||
#("Calendar Events", "http://villagevoice.com/syndication/events"),
|
||||
#("Promotional Events", "http://villagevoice.com/syndication/promoEvents"),
|
||||
#("Restaurant Guide", "http://villagevoice.com/syndication/restaurants/search")
|
||||
]
|
||||
|
||||
auto_cleanup = True
|
||||
max_articles_per_feed = 50
|
||||
masthead_url = "http://assets.villagevoice.com/img/citylogo.png"
|
||||
language = 'en'
|
||||
__author__ = 'Barty'
|
||||
|
||||
seen_urls = []
|
||||
|
||||
# village voice breaks the article up into multiple pages, so
|
||||
# parse page and grab the print url
|
||||
|
||||
url_regex = re.compile(r'\/content\/printVersion\/\d+',re.I)
|
||||
|
||||
def print_version(self, url):
|
||||
if url in self.seen_urls:
|
||||
return None
|
||||
self.seen_urls.append( url)
|
||||
soup = self.index_to_soup(url)
|
||||
atag = soup.find('a',attrs={'href':self.url_regex})
|
||||
if atag is None:
|
||||
self.log('Warning: no print url found for '+url)
|
||||
else:
|
||||
m = self.url_regex.search(atag['href'])
|
||||
if m:
|
||||
url = 'http://www.villagevoice.com'+m.group(0)
|
||||
return url
|
12
recipes/wired_it.recipe
Normal file
12
recipes/wired_it.recipe
Normal file
@ -0,0 +1,12 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1325758162(BasicNewsRecipe):
|
||||
title = u'Wired'
|
||||
language = 'it'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
remove_tags_after = [dict(name='div', attrs={'class':'article_content'})]
|
||||
feeds = [(u'Wired', u'http://www.wired.it/rss.xml')]
|
||||
__author__ = 'faber1971'
|
||||
description = 'An American magazine that reports on how new technology affects culture, the economy, and politics'
|
@ -14,7 +14,7 @@
|
||||
|
||||
.button a, .button:visited a {
|
||||
padding: 0.5em;
|
||||
font-size: 1.25em;
|
||||
font-size: larger;
|
||||
border: 1px solid black;
|
||||
text-color: black;
|
||||
text-decoration: none;
|
||||
|
@ -196,7 +196,9 @@ title_series_sorting = 'library_order'
|
||||
# set to 'strictly_alphabetic', the series will be sent without change.
|
||||
# For example, if the tweak is set to library_order, "The Lord of the Rings"
|
||||
# will become "Lord of the Rings, The". If the tweak is set to
|
||||
# strictly_alphabetic, it would remain "The Lord of the Rings".
|
||||
# strictly_alphabetic, it would remain "The Lord of the Rings". Note that the
|
||||
# formatter function raw_field will return the base value for title and
|
||||
# series regardless of the setting of this tweak.
|
||||
save_template_title_series_sorting = 'library_order'
|
||||
|
||||
#: Set the list of words considered to be "articles" for sort strings
|
||||
@ -291,7 +293,7 @@ auto_connect_to_folder = ''
|
||||
# how the value and category are combined together to make the collection name.
|
||||
# The only two fields available are {category} and {value}. The {value} field is
|
||||
# never empty. The {category} field can be empty. The default is to put the
|
||||
# value first, then the category enclosed in parentheses, it is isn't empty:
|
||||
# value first, then the category enclosed in parentheses, it isn't empty:
|
||||
# '{value} {category:|(|)}'
|
||||
# Examples: The first three examples assume that the second tweak
|
||||
# has not been changed.
|
||||
@ -471,3 +473,14 @@ unified_title_toolbar_on_osx = False
|
||||
# this to False you can prevent calibre from saving the original file.
|
||||
save_original_format = True
|
||||
|
||||
#: Number of recently viewed books to show
|
||||
# Right-clicking the View button shows a list of recently viewed books. Control
|
||||
# how many should be shown, here.
|
||||
gui_view_history_size = 15
|
||||
|
||||
#: When using the 'Tweak Book' action, which format to prefer
|
||||
# When tweaking a book that has multiple formats, calibre picks one
|
||||
# automatically. By default EPUB is preferred to HTMLZ. If you would like to
|
||||
# prefer HTMLZ to EPUB for tweaking, change this to 'htmlz'
|
||||
tweak_book_prefer = 'epub'
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/** @license Hyphenator X.Y.Z - client side hyphenation for webbrowsers
|
||||
* Copyright (C) 2010 Mathias Nater, Zürich (mathias at mnn dot ch)
|
||||
/** @license Hyphenator 4.0.0 - client side hyphenation for webbrowsers
|
||||
* Copyright (C) 2011 Mathias Nater, Zürich (mathias at mnn dot ch)
|
||||
* Project and Source hosted on http://code.google.com/p/hyphenator/
|
||||
*
|
||||
* This JavaScript code is free software: you can redistribute
|
||||
@ -15,6 +15,40 @@
|
||||
* that code without the copy of the GNU GPL normally required by
|
||||
* section 4, provided you include this license notice and a URL
|
||||
* through which recipients can access the Corresponding Source.
|
||||
*
|
||||
*
|
||||
* Hyphenator.js contains code from Bram Steins hypher.js-Project:
|
||||
* https://github.com/bramstein/Hypher
|
||||
*
|
||||
* Code from this project is marked in the source and belongs
|
||||
* to the following license:
|
||||
*
|
||||
* Copyright (c) 2011, Bram Stein
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* 3. The name of the author may not be used to endorse or promote products
|
||||
* derived from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
|
||||
* EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
|
||||
* INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
||||
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
@ -56,6 +90,7 @@ var Hyphenator = (function (window) {
|
||||
*/
|
||||
supportedLang = {
|
||||
'be': 'be.js',
|
||||
'ca': 'ca.js',
|
||||
'cs': 'cs.js',
|
||||
'da': 'da.js',
|
||||
'bn': 'bn.js',
|
||||
@ -80,14 +115,16 @@ var Hyphenator = (function (window) {
|
||||
'lt': 'lt.js',
|
||||
'lv': 'lv.js',
|
||||
'ml': 'ml.js',
|
||||
'no': 'no-nb.js',
|
||||
'no-nb': 'no-nb.js',
|
||||
'nb': 'nb-no.js',
|
||||
'no': 'nb-no.js',
|
||||
'nb-no': 'nb-no.js',
|
||||
'nl': 'nl.js',
|
||||
'or': 'or.js',
|
||||
'pa': 'pa.js',
|
||||
'pl': 'pl.js',
|
||||
'pt': 'pt.js',
|
||||
'ru': 'ru.js',
|
||||
'sk': 'sk.js',
|
||||
'sl': 'sl.js',
|
||||
'sv': 'sv.js',
|
||||
'ta': 'ta.js',
|
||||
@ -235,7 +272,7 @@ var Hyphenator = (function (window) {
|
||||
* @private
|
||||
* @see Hyphenator-hyphenateElement
|
||||
*/
|
||||
dontHyphenate = {'script': true, 'code': true, 'pre': true, 'img': true, 'br': true, 'samp': true, 'kbd': true, 'var': true, 'abbr': true, 'acronym': true, 'sub': true, 'sup': true, 'button': true, 'option': true, 'label': true, 'textarea': true, 'input': true},
|
||||
dontHyphenate = {'script': true, 'code': true, 'pre': true, 'img': true, 'br': true, 'samp': true, 'kbd': true, 'var': true, 'abbr': true, 'acronym': true, 'sub': true, 'sup': true, 'button': true, 'option': true, 'label': true, 'textarea': true, 'input': true, 'math': true, 'svg': true},
|
||||
|
||||
/**
|
||||
* @name Hyphenator-enableCache
|
||||
@ -309,6 +346,86 @@ var Hyphenator = (function (window) {
|
||||
*/
|
||||
displayToggleBox = false,
|
||||
|
||||
/**
|
||||
* @name Hyphenator-css3
|
||||
* @description
|
||||
* A variable to set if css3 hyphenation should be used
|
||||
* @type boolean
|
||||
* @default false
|
||||
* @private
|
||||
* @see Hyphenator.config
|
||||
*/
|
||||
css3 = false,
|
||||
/**
|
||||
* @name Hyphenator-css3_hsupport
|
||||
* @description
|
||||
* A generated object containing information for CSS3-hyphenation support
|
||||
* {
|
||||
* support: boolean,
|
||||
* property: <the property name to access hyphen-settings>,
|
||||
* languages: <an object containing supported languages>
|
||||
* }
|
||||
* @type object
|
||||
* @default undefined
|
||||
* @private
|
||||
* @see Hyphenator-css3_gethsupport
|
||||
*/
|
||||
css3_h9n,
|
||||
/**
|
||||
* @name Hyphenator-css3_gethsupport
|
||||
* @description
|
||||
* This function sets Hyphenator-css3_h9n for the current UA
|
||||
* @type function
|
||||
* @private
|
||||
* @see Hyphenator-css3_h9n
|
||||
*/
|
||||
css3_gethsupport = function () {
|
||||
var s,
|
||||
ua = navigator.userAgent,
|
||||
r = {
|
||||
support: false,
|
||||
property: '',
|
||||
languages: {}
|
||||
};
|
||||
if (window.getComputedStyle) {
|
||||
s = window.getComputedStyle(window.document.getElementsByTagName('body')[0]);
|
||||
} else {
|
||||
//ancient Browser don't support CSS3 anyway
|
||||
css3_h9n = r;
|
||||
return;
|
||||
}
|
||||
if (ua.indexOf('Chrome') !== -1) {
|
||||
//Chrome actually knows -webkit-hyphens but does no hyphenation
|
||||
r.support = false;
|
||||
} else if ((ua.indexOf('Safari') !== -1) && (s['-webkit-hyphens'] !== undefined)) {
|
||||
r.support = true;
|
||||
r.property = '-webkit-hyphens';
|
||||
if (ua.indexOf('Mobile') !== -1) {
|
||||
//iOS only hyphenates in systemlanguage
|
||||
r.languages[navigator.language.split('-')[0]] = true;
|
||||
} else {
|
||||
//Desktop Safari only hyphenates some languages:
|
||||
r.languages = {
|
||||
de: true,
|
||||
en: true,
|
||||
es: true,
|
||||
fr: true,
|
||||
it: true,
|
||||
nl: true,
|
||||
ru: true,
|
||||
zh: true
|
||||
};
|
||||
}
|
||||
} else if ((ua.indexOf('Firefox') !== -1) && (s['MozHyphens'] !== undefined)) {
|
||||
r.support = true;
|
||||
r.property = 'MozHyphens';
|
||||
r.languages = {
|
||||
en: true
|
||||
};
|
||||
}
|
||||
css3_h9n = r;
|
||||
},
|
||||
|
||||
/**
|
||||
* @name Hyphenator-hyphenateClass
|
||||
* @description
|
||||
@ -405,6 +522,7 @@ var Hyphenator = (function (window) {
|
||||
*/
|
||||
defaultLanguage = '',
|
||||
|
||||
|
||||
/**
|
||||
* @name Hyphenator-elements
|
||||
* @description
|
||||
@ -413,7 +531,38 @@ var Hyphenator = (function (window) {
|
||||
* @type {Array}
|
||||
* @private
|
||||
*/
|
||||
elements = [],
|
||||
elements = (function () {
|
||||
var Element = function (element, data) {
|
||||
this.element = element;
|
||||
this.hyphenated = false;
|
||||
this.treated = false; //collected but not hyphenated (dohyphenation is off)
|
||||
this.data = data;
|
||||
},
|
||||
ElementCollection = function () {
|
||||
this.count = 0;
|
||||
this.hyCount = 0;
|
||||
this.list = {};
|
||||
};
|
||||
ElementCollection.prototype = {
|
||||
add: function (el, lang, data) {
|
||||
if (!this.list.hasOwnProperty(lang)) {
|
||||
this.list[lang] = [];
|
||||
}
|
||||
this.list[lang].push(new Element(el, data));
|
||||
this.count += 1;
|
||||
},
|
||||
each: function (fn) {
|
||||
var k;
|
||||
for (k in this.list) {
|
||||
if (this.list.hasOwnProperty(k)) {
|
||||
fn(k, this.list[k]);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
return new ElementCollection();
|
||||
}()),
|
||||
|
||||
|
||||
/**
|
||||
* @name Hyphenator-exceptions
|
||||
@ -426,15 +575,6 @@ var Hyphenator = (function (window) {
|
||||
*/
|
||||
exceptions = {},
|
||||
|
||||
countObjProps = function (obj) {
|
||||
var k, l = 0;
|
||||
for (k in obj) {
|
||||
if (obj.hasOwnProperty(k)) {
|
||||
l++;
|
||||
}
|
||||
}
|
||||
return l;
|
||||
},
|
||||
/**
|
||||
* @name Hyphenator-docLanguages
|
||||
* @description
|
||||
@ -445,7 +585,6 @@ var Hyphenator = (function (window) {
|
||||
*/
|
||||
docLanguages = {},
|
||||
|
||||
|
||||
/**
|
||||
* @name Hyphenator-state
|
||||
* @description
|
||||
@ -583,6 +722,18 @@ var Hyphenator = (function (window) {
|
||||
*/
|
||||
intermediateState = 'hidden',
|
||||
|
||||
/**
|
||||
* @name Hyphenator-unhide
|
||||
* @description
|
||||
* How hidden elements unhide: either simultaneous (default: 'wait') or progressively.
|
||||
* 'wait' makes Hyphenator.js to wait until all elements are hyphenated (one redraw)
|
||||
* With 'progressiv' Hyphenator.js unhides elements as soon as they are hyphenated.
|
||||
* @see Hyphenator.config
|
||||
* @type {string}
|
||||
* @private
|
||||
*/
|
||||
unhide = 'wait',
|
||||
|
||||
/**
|
||||
* @name Hyphenator-hyphen
|
||||
* @description
|
||||
@ -619,50 +770,6 @@ var Hyphenator = (function (window) {
|
||||
*/
|
||||
safeCopy = true,
|
||||
|
||||
/**
|
||||
* @name Hyphenator-Expando
|
||||
* @description
|
||||
* This custom object stores data for elements: storing data directly in elements
|
||||
* (DomElement.customData = foobar;) isn't a good idea. It would lead to conflicts
|
||||
* in form elements, when the form has a child with name="foobar". Therefore, this
|
||||
* solution follows the approach of jQuery: the data is stored in an object and
|
||||
* referenced by a unique attribute of the element. The attribute has a name that
|
||||
* is built by the prefix "HyphenatorExpando_" and a random number, so if the very
|
||||
* very rare case occurs, that there's already an attribute with the same name, a
|
||||
* simple reload is enough to make it function.
|
||||
* @private
|
||||
*/
|
||||
Expando = (function () {
|
||||
var container = {},
|
||||
name = "HyphenatorExpando_" + Math.random(),
|
||||
uuid = 0;
|
||||
return {
|
||||
getDataForElem : function (elem) {
|
||||
return container[elem[name].id];
|
||||
},
|
||||
setDataForElem : function (elem, data) {
|
||||
var id;
|
||||
if (elem[name] && elem[name].id !== '') {
|
||||
id = elem[name].id;
|
||||
} else {
|
||||
id = uuid++;
|
||||
elem[name] = {'id': id}; //object needed, otherways it is reflected in HTML in IE
|
||||
}
|
||||
container[id] = data;
|
||||
},
|
||||
appendDataForElem : function (elem, data) {
|
||||
var k;
|
||||
for (k in data) {
|
||||
if (data.hasOwnProperty(k)) {
|
||||
container[elem[name].id][k] = data[k];
|
||||
}
|
||||
}
|
||||
},
|
||||
delDataOfElem : function (elem) {
|
||||
delete container[elem[name]];
|
||||
}
|
||||
};
|
||||
}()),
|
||||
|
||||
/*
|
||||
* runOnContentLoaded is based od jQuery.bindReady()
|
||||
@ -915,36 +1022,41 @@ var Hyphenator = (function (window) {
|
||||
var elToProcess, tmp, i = 0,
|
||||
process = function (el, hide, lang) {
|
||||
var n, i = 0, hyphenatorSettings = {};
|
||||
if (hide && intermediateState === 'hidden') {
|
||||
if (!!el.getAttribute('style')) {
|
||||
hyphenatorSettings.hasOwnStyle = true;
|
||||
} else {
|
||||
hyphenatorSettings.hasOwnStyle = false;
|
||||
}
|
||||
hyphenatorSettings.isHidden = true;
|
||||
el.style.visibility = 'hidden';
|
||||
}
|
||||
if (el.lang && typeof(el.lang) === 'string') {
|
||||
hyphenatorSettings.language = el.lang.toLowerCase(); //copy attribute-lang to internal lang
|
||||
} else if (lang) {
|
||||
hyphenatorSettings.language = lang.toLowerCase();
|
||||
} else {
|
||||
hyphenatorSettings.language = getLang(el, true);
|
||||
}
|
||||
lang = hyphenatorSettings.language;
|
||||
if (supportedLang[lang]) {
|
||||
docLanguages[lang] = true;
|
||||
} else {
|
||||
if (supportedLang.hasOwnProperty(lang.split('-')[0])) { //try subtag
|
||||
lang = lang.split('-')[0];
|
||||
hyphenatorSettings.language = lang;
|
||||
} else if (!isBookmarklet) {
|
||||
onError(new Error('Language ' + lang + ' is not yet supported.'));
|
||||
}
|
||||
}
|
||||
Expando.setDataForElem(el, hyphenatorSettings);
|
||||
|
||||
elements.push(el);
|
||||
if (el.lang && typeof(el.lang) === 'string') {
|
||||
lang = el.lang.toLowerCase(); //copy attribute-lang to internal lang
|
||||
} else if (lang) {
|
||||
lang = lang.toLowerCase();
|
||||
} else {
|
||||
lang = getLang(el, true);
|
||||
}
|
||||
|
||||
//if css3-hyphenation is supported: use it!
|
||||
if (css3 && css3_h9n.support && !!css3_h9n.languages[lang]) {
|
||||
el.style[css3_h9n.property] = "auto";
|
||||
el.style['-webkit-locale'] = "'" + lang + "'";
|
||||
} else {
|
||||
if (intermediateState === 'hidden') {
|
||||
if (!!el.getAttribute('style')) {
|
||||
hyphenatorSettings.hasOwnStyle = true;
|
||||
} else {
|
||||
hyphenatorSettings.hasOwnStyle = false;
|
||||
}
|
||||
hyphenatorSettings.isHidden = true;
|
||||
el.style.visibility = 'hidden';
|
||||
}
|
||||
if (supportedLang[lang]) {
|
||||
docLanguages[lang] = true;
|
||||
} else {
|
||||
if (supportedLang.hasOwnProperty(lang.split('-')[0])) { //try subtag
|
||||
lang = lang.split('-')[0];
|
||||
hyphenatorSettings.language = lang;
|
||||
} else if (!isBookmarklet) {
|
||||
onError(new Error('Language ' + lang + ' is not yet supported.'));
|
||||
}
|
||||
}
|
||||
elements.add(el, lang, hyphenatorSettings);
|
||||
}
|
||||
while (!!(n = el.childNodes[i++])) {
|
||||
if (n.nodeType === 1 && !dontHyphenate[n.nodeName.toLowerCase()] &&
|
||||
n.className.indexOf(dontHyphenateClass) === -1 && !(n in elToProcess)) {
|
||||
@ -952,6 +1064,9 @@ var Hyphenator = (function (window) {
|
||||
}
|
||||
}
|
||||
};
|
||||
if (css3) {
|
||||
css3_gethsupport();
|
||||
}
|
||||
if (isBookmarklet) {
|
||||
elToProcess = contextWindow.document.getElementsByTagName('body')[0];
|
||||
process(elToProcess, false, mainLanguage);
|
||||
@ -962,42 +1077,107 @@ var Hyphenator = (function (window) {
|
||||
process(tmp, true, '');
|
||||
}
|
||||
}
|
||||
if (!Hyphenator.languages.hasOwnProperty(mainLanguage)) {
|
||||
docLanguages[mainLanguage] = true;
|
||||
} else if (!Hyphenator.languages[mainLanguage].prepared) {
|
||||
docLanguages[mainLanguage] = true;
|
||||
}
|
||||
if (elements.length > 0) {
|
||||
Expando.appendDataForElem(elements[elements.length - 1], {isLast : true});
|
||||
if (elements.count === 0) {
|
||||
//nothing to hyphenate or all hyphenated b css3
|
||||
state = 3;
|
||||
onHyphenationDone();
|
||||
}
|
||||
},
|
||||
|
||||
|
||||
/**
|
||||
* @name Hyphenator-convertPatterns
|
||||
* @name Hyphenator-createTrie
|
||||
* @description
|
||||
* Converts the patterns from string '_a6' to object '_a':'_a6'.
|
||||
* The result is stored in the {@link Hyphenator-patterns}-object.
|
||||
* converts patterns of the given language in a trie
|
||||
* @private
|
||||
* @param {string} lang the language whose patterns shall be converted
|
||||
*/
|
||||
convertPatterns = function (lang) {
|
||||
var plen, anfang, ende, pats, pat, key, tmp = {};
|
||||
pats = Hyphenator.languages[lang].patterns;
|
||||
for (plen in pats) {
|
||||
if (pats.hasOwnProperty(plen)) {
|
||||
plen = parseInt(plen, 10);
|
||||
anfang = 0;
|
||||
ende = plen;
|
||||
while (!!(pat = pats[plen].substring(anfang, ende))) {
|
||||
key = pat.replace(/\d/g, '');
|
||||
tmp[key] = pat;
|
||||
anfang = ende;
|
||||
ende += plen;
|
||||
/** @license BSD licenced code
|
||||
* The following code is based on code from hypher.js and adapted for Hyphenator.js
|
||||
* Copyright (c) 2011, Bram Stein
|
||||
*/
|
||||
var size = 0,
|
||||
tree = {
|
||||
tpoints: []
|
||||
},
|
||||
patterns, pattern, i, j, k,
|
||||
patternObject = Hyphenator.languages[lang].patterns,
|
||||
c, chars, points, t, p, codePoint,
|
||||
getPoints = (function () {
|
||||
//IE<9 doesn't act like other browsers
|
||||
if ('in3se'.split(/\D/).length === 1) {
|
||||
return function (pattern) {
|
||||
var chars = pattern.split(''), c, i, r = [],
|
||||
numb3rs = {'0': 0, '1': 1, '2': 2, '3': 3, '4': 4, '5': 5, '6': 6, '7': 7, '8': 8, '9': 9}, lastWasNum = false;
|
||||
i = 0;
|
||||
while (!!(c = chars[i])) {
|
||||
if (numb3rs.hasOwnProperty(c)) {
|
||||
r.push(c);
|
||||
i += 2;
|
||||
lastWasNum = true;
|
||||
} else {
|
||||
r.push('');
|
||||
i += 1;
|
||||
lastWasNum = false;
|
||||
}
|
||||
}
|
||||
if (!lastWasNum) {
|
||||
r.push('');
|
||||
}
|
||||
return r;
|
||||
};
|
||||
} else {
|
||||
return function (pattern) {
|
||||
return pattern.split(/\D/);
|
||||
};
|
||||
}
|
||||
}());
|
||||
|
||||
for (size in patternObject) {
|
||||
if (patternObject.hasOwnProperty(size)) {
|
||||
patterns = patternObject[size].match(new RegExp('.{1,' + (+size) + '}', 'g'));
|
||||
i = 0;
|
||||
while (!!(pattern = patterns[i++])) {
|
||||
chars = pattern.replace(/[\d]/g, '').split('');
|
||||
points = getPoints(pattern);
|
||||
t = tree;
|
||||
|
||||
j = 0;
|
||||
while (!!(c = chars[j++])) {
|
||||
codePoint = c.charCodeAt(0);
|
||||
|
||||
if (!t[codePoint]) {
|
||||
t[codePoint] = {};
|
||||
}
|
||||
t = t[codePoint];
|
||||
}
|
||||
|
||||
t.tpoints = [];
|
||||
for (k = 0; k < points.length; k++) {
|
||||
p = points[k];
|
||||
t.tpoints.push((p == "") ? 0 : p);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Hyphenator.languages[lang].patterns = tmp;
|
||||
Hyphenator.languages[lang].patternsConverted = true;
|
||||
Hyphenator.languages[lang].patterns = tree;
|
||||
/**
|
||||
* end of BSD licenced code from hypher.js
|
||||
*/
|
||||
},
|
||||
|
||||
recreatePattern = function (pattern, nodePoints) {
|
||||
var r = [], c = pattern.split(''), i;
|
||||
for (i = 0; i < nodePoints.length; i++) {
|
||||
if (nodePoints[i] !== 0) {
|
||||
r.push(nodePoints[i]);
|
||||
}
|
||||
if (c[i]) {
|
||||
r.push(c[i]);
|
||||
}
|
||||
}
|
||||
return r.join('');
|
||||
},
|
||||
|
||||
/**
|
||||
@ -1112,7 +1292,7 @@ var Hyphenator = (function (window) {
|
||||
lo.exceptions = {};
|
||||
}
|
||||
convertPatterns(lang);
|
||||
wrd = '[\\w' + lo.specialChars + '@' + String.fromCharCode(173) + '-]{' + min + ',}';
|
||||
wrd = '[\\w' + lo.specialChars + '@' + String.fromCharCode(173) + String.fromCharCode(8204) + '-]{' + min + ',}';
|
||||
lo.genRegExp = new RegExp('(' + url + ')|(' + mail + ')|(' + wrd + ')', 'gi');
|
||||
lo.prepared = true;
|
||||
}
|
||||
@ -1136,7 +1316,6 @@ var Hyphenator = (function (window) {
|
||||
* by repeatedly checking Hyphenator.languages. If a patterfile is loaded the patterns are
|
||||
* converted to their object style and the lang-object extended.
|
||||
* Finally the callback is called.
|
||||
* @param {function()} callback to call, when all patterns are loaded
|
||||
* @private
|
||||
*/
|
||||
prepare = function (callback) {
|
||||
@ -1148,7 +1327,7 @@ var Hyphenator = (function (window) {
|
||||
}
|
||||
}
|
||||
state = 2;
|
||||
callback();
|
||||
callback('*');
|
||||
return;
|
||||
}
|
||||
// get all languages that are used and preload the patterns
|
||||
@ -1176,23 +1355,18 @@ var Hyphenator = (function (window) {
|
||||
delete exceptions[lang];
|
||||
}
|
||||
//Replace genRegExp since it may have been changed:
|
||||
tmp1 = '[\\w' + Hyphenator.languages[lang].specialChars + '@' + String.fromCharCode(173) + '-]{' + min + ',}';
|
||||
tmp1 = '[\\w' + Hyphenator.languages[lang].specialChars + '@' + String.fromCharCode(173) + String.fromCharCode(8204) + '-]{' + min + ',}';
|
||||
Hyphenator.languages[lang].genRegExp = new RegExp('(' + url + ')|(' + mail + ')|(' + tmp1 + ')', 'gi');
|
||||
|
||||
delete docLanguages[lang];
|
||||
callback(lang);
|
||||
continue;
|
||||
} else {
|
||||
loadPatterns(lang);
|
||||
}
|
||||
}
|
||||
}
|
||||
// if all patterns are loaded from storage: callback
|
||||
if (countObjProps(docLanguages) === 0) {
|
||||
state = 2;
|
||||
callback();
|
||||
return;
|
||||
}
|
||||
// else async wait until patterns are loaded, then callback
|
||||
// else async wait until patterns are loaded, then hyphenate
|
||||
interval = window.setInterval(function () {
|
||||
var finishedLoading = true, lang;
|
||||
for (lang in docLanguages) {
|
||||
@ -1202,6 +1376,7 @@ var Hyphenator = (function (window) {
|
||||
delete docLanguages[lang];
|
||||
//do conversion while other patterns are loading:
|
||||
prepareLanguagesObj(lang);
|
||||
callback(lang);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1209,7 +1384,6 @@ var Hyphenator = (function (window) {
|
||||
//console.log('callig callback for ' + contextWindow.location.href);
|
||||
window.clearInterval(interval);
|
||||
state = 2;
|
||||
callback();
|
||||
}
|
||||
}, 100);
|
||||
},
|
||||
@ -1218,7 +1392,6 @@ var Hyphenator = (function (window) {
|
||||
* @name Hyphenator-switchToggleBox
|
||||
* @description
|
||||
* Creates or hides the toggleBox: a small button to turn off/on hyphenation on a page.
|
||||
* @param {boolean} s true when hyphenation is on, false when it's off
|
||||
* @see Hyphenator.config
|
||||
* @private
|
||||
*/
|
||||
@ -1255,6 +1428,7 @@ var Hyphenator = (function (window) {
|
||||
}
|
||||
},
|
||||
|
||||
|
||||
/**
|
||||
* @name Hyphenator-hyphenateWord
|
||||
* @description
|
||||
@ -1269,8 +1443,10 @@ var Hyphenator = (function (window) {
|
||||
* @public
|
||||
*/
|
||||
hyphenateWord = function (lang, word) {
|
||||
var lo = Hyphenator.languages[lang],
|
||||
parts, i, l, w, wl, s, hypos, p, maxwins, win, pat = false, patk, c, t, n, numb3rs, inserted, hyphenatedword, val;
|
||||
var lo = Hyphenator.languages[lang], parts, l, subst,
|
||||
w, characters, originalCharacters, wordLength, i, j, k, node, points = [],
|
||||
characterPoints = [], nodePoints, nodePointsLength, m = Math.max, trie,
|
||||
result = [''], pattern;
|
||||
if (word === '') {
|
||||
return '';
|
||||
}
|
||||
@ -1292,62 +1468,66 @@ var Hyphenator = (function (window) {
|
||||
}
|
||||
return parts.join('-');
|
||||
}
|
||||
//finally the core hyphenation algorithm
|
||||
w = '_' + word + '_';
|
||||
wl = w.length;
|
||||
s = w.split('');
|
||||
if (word.indexOf("'") !== -1) {
|
||||
w = w.toLowerCase().replace("'", "’"); //replace APOSTROPHE with RIGHT SINGLE QUOTATION MARK (since the latter is used in the patterns)
|
||||
} else {
|
||||
w = w.toLowerCase();
|
||||
w = word = '_' + word + '_';
|
||||
if (!!lo.charSubstitution) {
|
||||
for (subst in lo.charSubstitution) {
|
||||
if (lo.charSubstitution.hasOwnProperty(subst)) {
|
||||
w = w.replace(new RegExp(subst, 'g'), lo.charSubstitution[subst]);
|
||||
}
|
||||
}
|
||||
}
|
||||
hypos = [];
|
||||
numb3rs = {'0': 0, '1': 1, '2': 2, '3': 3, '4': 4, '5': 5, '6': 6, '7': 7, '8': 8, '9': 9}; //check for member is faster then isFinite()
|
||||
n = wl - lo.shortestPattern;
|
||||
for (p = 0; p <= n; p++) {
|
||||
maxwins = Math.min((wl - p), lo.longestPattern);
|
||||
for (win = lo.shortestPattern; win <= maxwins; win++) {
|
||||
if (lo.patterns.hasOwnProperty(patk = w.substring(p, p + win))) {
|
||||
pat = lo.patterns[patk];
|
||||
if (enableReducedPatternSet && (typeof pat === 'string')) {
|
||||
lo.redPatSet[patk] = pat;
|
||||
if (word.indexOf("'") !== -1) {
|
||||
w = w.replace("'", "’"); //replace APOSTROPHE with RIGHT SINGLE QUOTATION MARK (since the latter is used in the patterns)
|
||||
}
|
||||
/** @license BSD licenced code
|
||||
* The following code is based on code from hypher.js
|
||||
* Copyright (c) 2011, Bram Stein
|
||||
*/
|
||||
characters = w.toLowerCase().split('');
|
||||
originalCharacters = word.split('');
|
||||
wordLength = characters.length;
|
||||
trie = lo.patterns;
|
||||
for (i = 0; i < wordLength; i += 1) {
|
||||
points[i] = 0;
|
||||
characterPoints[i] = characters[i].charCodeAt(0);
|
||||
}
|
||||
for (i = 0; i < wordLength; i += 1) {
|
||||
pattern = '';
|
||||
node = trie;
|
||||
for (j = i; j < wordLength; j += 1) {
|
||||
node = node[characterPoints[j]];
|
||||
if (node) {
|
||||
if (enableReducedPatternSet) {
|
||||
pattern += String.fromCharCode(characterPoints[j]);
|
||||
}
|
||||
if (typeof pat === 'string') {
|
||||
//convert from string 'a5b' to array [1,5] (pos,value)
|
||||
t = 0;
|
||||
val = [];
|
||||
for (i = 0; i < pat.length; i++) {
|
||||
if (!!(c = numb3rs[pat.charAt(i)])) {
|
||||
val.push(i - t, c);
|
||||
t++;
|
||||
nodePoints = node.tpoints;
|
||||
if (nodePoints) {
|
||||
if (enableReducedPatternSet) {
|
||||
if (!lo.redPatSet) {
|
||||
lo.redPatSet = {};
|
||||
}
|
||||
lo.redPatSet[pattern] = recreatePattern(pattern, nodePoints);
|
||||
}
|
||||
for (k = 0, nodePointsLength = nodePoints.length; k < nodePointsLength; k += 1) {
|
||||
points[i + k] = m(points[i + k], nodePoints[k]);
|
||||
}
|
||||
pat = lo.patterns[patk] = val;
|
||||
}
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
for (i = 0; i < pat.length; i++) {
|
||||
c = p - 1 + pat[i];
|
||||
if (!hypos[c] || hypos[c] < pat[i + 1]) {
|
||||
hypos[c] = pat[i + 1];
|
||||
}
|
||||
i++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
inserted = 0;
|
||||
for (i = lo.leftmin; i <= (word.length - lo.rightmin); i++) {
|
||||
if (!!(hypos[i] & 1)) {
|
||||
s.splice(i + inserted + 1, 0, hyphen);
|
||||
inserted++;
|
||||
for (i = 1; i < wordLength - 1; i += 1) {
|
||||
if (i > lo.leftmin && i < (wordLength - lo.rightmin) && points[i] % 2) {
|
||||
result.push(originalCharacters[i]);
|
||||
} else {
|
||||
result[result.length - 1] += originalCharacters[i];
|
||||
}
|
||||
}
|
||||
hyphenatedword = s.slice(1, -1).join('');
|
||||
if (enableCache) {
|
||||
lo.cache[word] = hyphenatedword;
|
||||
}
|
||||
return hyphenatedword;
|
||||
return result.join(hyphen);
|
||||
/**
|
||||
* end of BSD licenced code from hypher.js
|
||||
*/
|
||||
},
|
||||
|
||||
/**
|
||||
@ -1425,13 +1605,18 @@ var Hyphenator = (function (window) {
|
||||
}
|
||||
//create a hidden shadow element
|
||||
shadow = currDoc.createElement('div');
|
||||
shadow.style.overflow = 'hidden';
|
||||
shadow.style.position = 'absolute';
|
||||
shadow.style.top = '-5000px';
|
||||
shadow.style.height = '1px';
|
||||
//Moving the element out of the screen doesn't work for IE9 (https://connect.microsoft.com/IE/feedback/details/663981/)
|
||||
//shadow.style.overflow = 'hidden';
|
||||
//shadow.style.position = 'absolute';
|
||||
//shadow.style.top = '-5000px';
|
||||
//shadow.style.height = '1px';
|
||||
//doing this instead:
|
||||
shadow.style.color = window.getComputedStyle ? targetWindow.getComputedStyle(body).backgroundColor : '#FFFFFF';
|
||||
shadow.style.fontSize = '0px';
|
||||
body.appendChild(shadow);
|
||||
if (!!window.getSelection) {
|
||||
//FF3, Webkit
|
||||
//FF3, Webkit, IE9
|
||||
e.stopPropagation();
|
||||
selection = targetWindow.getSelection();
|
||||
range = selection.getRangeAt(0);
|
||||
shadow.appendChild(range.cloneContents());
|
||||
@ -1439,10 +1624,12 @@ var Hyphenator = (function (window) {
|
||||
selection.selectAllChildren(shadow);
|
||||
restore = function () {
|
||||
shadow.parentNode.removeChild(shadow);
|
||||
selection.removeAllRanges(); //IE9 needs that
|
||||
selection.addRange(range);
|
||||
};
|
||||
} else {
|
||||
// IE
|
||||
// IE<9
|
||||
e.cancelBubble = true;
|
||||
selection = targetWindow.document.selection;
|
||||
range = selection.createRange();
|
||||
shadow.innerHTML = range.htmlText;
|
||||
@ -1464,12 +1651,59 @@ var Hyphenator = (function (window) {
|
||||
}
|
||||
el = el || body;
|
||||
if (window.addEventListener) {
|
||||
el.addEventListener("copy", oncopyHandler, false);
|
||||
el.addEventListener("copy", oncopyHandler, true);
|
||||
} else {
|
||||
el.attachEvent("oncopy", oncopyHandler);
|
||||
}
|
||||
},
|
||||
|
||||
/**
|
||||
* @name Hyphenator-unhideElement
|
||||
* @description
|
||||
* Unhides an element and removes the visibility attr if set by hyphenator
|
||||
* @param Object The Element object from ElementCollection
|
||||
* @private
|
||||
*/
|
||||
unhideElement = function (elo) {
|
||||
var el = elo.element,
|
||||
hyphenatorSettings = elo.data;
|
||||
el.style.visibility = 'visible';
|
||||
elo.data.isHidden = false;
|
||||
if (!hyphenatorSettings.hasOwnStyle) {
|
||||
el.setAttribute('style', ''); // without this, removeAttribute doesn't work in Safari (thanks to molily)
|
||||
el.removeAttribute('style');
|
||||
} else {
|
||||
if (el.style.removeProperty) {
|
||||
el.style.removeProperty('visibility');
|
||||
} else if (el.style.removeAttribute) { // IE
|
||||
el.style.removeAttribute('visibility');
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
/**
|
||||
* @name Hyphenator-checkIfAllDone
|
||||
* @description
|
||||
* Checks if all Elements are hyphenated, unhides them and fires onHyphenationDone()
|
||||
* @private
|
||||
*/
|
||||
checkIfAllDone = function () {
|
||||
var allDone = true;
|
||||
elements.each(function (lang, list) {
|
||||
var i, l = list.length;
|
||||
for (i = 0; i < l; i++) {
|
||||
allDone = allDone && list[i].hyphenated;
|
||||
if (intermediateState === 'hidden' && unhide === 'wait') {
|
||||
unhideElement(list[i]);
|
||||
}
|
||||
}
|
||||
});
|
||||
if (allDone) {
|
||||
state = 3;
|
||||
onHyphenationDone();
|
||||
}
|
||||
},
|
||||
|
||||
|
||||
/**
|
||||
* @name Hyphenator-hyphenateElement
|
||||
@ -1480,9 +1714,10 @@ var Hyphenator = (function (window) {
|
||||
* @param {Object} el The element to hyphenate
|
||||
* @private
|
||||
*/
|
||||
hyphenateElement = function (el) {
|
||||
var hyphenatorSettings = Expando.getDataForElem(el),
|
||||
lang = hyphenatorSettings.language, hyphenate, n, i,
|
||||
hyphenateElement = function (lang, elo) {
|
||||
var hyphenatorSettings = elo.data,
|
||||
el = elo.element,
|
||||
hyphenate, n, i,
|
||||
controlOrphans = function (part) {
|
||||
var h, r;
|
||||
switch (hyphen) {
|
||||
@ -1534,48 +1769,47 @@ var Hyphenator = (function (window) {
|
||||
}
|
||||
}
|
||||
}
|
||||
if (hyphenatorSettings.isHidden && intermediateState === 'hidden') {
|
||||
el.style.visibility = 'visible';
|
||||
if (!hyphenatorSettings.hasOwnStyle) {
|
||||
el.setAttribute('style', ''); // without this, removeAttribute doesn't work in Safari (thanks to molily)
|
||||
el.removeAttribute('style');
|
||||
} else {
|
||||
if (el.style.removeProperty) {
|
||||
el.style.removeProperty('visibility');
|
||||
} else if (el.style.removeAttribute) { // IE
|
||||
el.style.removeAttribute('visibility');
|
||||
}
|
||||
}
|
||||
if (hyphenatorSettings.isHidden && intermediateState === 'hidden' && unhide === 'progressive') {
|
||||
unhideElement(elo);
|
||||
}
|
||||
if (hyphenatorSettings.isLast) {
|
||||
state = 3;
|
||||
documentCount--;
|
||||
if (documentCount > (-1000) && documentCount <= 0) {
|
||||
documentCount = (-2000);
|
||||
onHyphenationDone();
|
||||
}
|
||||
elo.hyphenated = true;
|
||||
elements.hyCount += 1;
|
||||
if (elements.count <= elements.hyCount) {
|
||||
checkIfAllDone();
|
||||
}
|
||||
},
|
||||
|
||||
|
||||
/**
|
||||
* @name Hyphenator-hyphenateDocument
|
||||
* @name Hyphenator-hyphenateLanguageElements
|
||||
* @description
|
||||
* Calls hyphenateElement() for all members of elements. This is done with a setTimout
|
||||
* Calls hyphenateElement() for all elements of the specified language.
|
||||
* If the language is '*' then all elements are hyphenated.
|
||||
* This is done with a setTimout
|
||||
* to prevent a "long running Script"-alert when hyphenating large pages.
|
||||
* Therefore a tricky bind()-function was necessary.
|
||||
* @private
|
||||
*/
|
||||
hyphenateDocument = function () {
|
||||
function bind(fun, arg) {
|
||||
hyphenateLanguageElements = function (lang) {
|
||||
function bind(fun, arg1, arg2) {
|
||||
return function () {
|
||||
return fun(arg);
|
||||
return fun(arg1, arg2);
|
||||
};
|
||||
}
|
||||
var i = 0, el;
|
||||
while (!!(el = elements[i++])) {
|
||||
if (el.ownerDocument.location.href === contextWindow.location.href) {
|
||||
window.setTimeout(bind(hyphenateElement, el), 0);
|
||||
var el, i, l;
|
||||
if (lang === '*') {
|
||||
elements.each(function (lang, langels) {
|
||||
var i, l = langels.length;
|
||||
for (i = 0; i < l; i++) {
|
||||
window.setTimeout(bind(hyphenateElement, lang, langels[i]), 0);
|
||||
}
|
||||
});
|
||||
} else {
|
||||
if (elements.list.hasOwnProperty(lang)) {
|
||||
l = elements.list[lang].length;
|
||||
for (i = 0; i < l; i++) {
|
||||
window.setTimeout(bind(hyphenateElement, lang, elements.list[lang][i]), 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
@ -1587,10 +1821,13 @@ var Hyphenator = (function (window) {
|
||||
* @private
|
||||
*/
|
||||
removeHyphenationFromDocument = function () {
|
||||
var i = 0, el;
|
||||
while (!!(el = elements[i++])) {
|
||||
removeHyphenationFromElement(el);
|
||||
}
|
||||
elements.each(function (lang, elo) {
|
||||
var i, l = elo.length, el;
|
||||
for (i = 0; i < l; i++) {
|
||||
removeHyphenationFromElement(elo[i].element);
|
||||
elo[i].hyphenated = false;
|
||||
}
|
||||
});
|
||||
state = 4;
|
||||
},
|
||||
|
||||
@ -1687,7 +1924,7 @@ var Hyphenator = (function (window) {
|
||||
* minor release: new languages, improvements
|
||||
* @public
|
||||
*/
|
||||
version: 'X.Y.Z',
|
||||
version: '4.0.0',
|
||||
|
||||
/**
|
||||
* @name Hyphenator.doHyphenation
|
||||
@ -1892,6 +2129,16 @@ var Hyphenator = (function (window) {
|
||||
defaultLanguage = obj[key];
|
||||
}
|
||||
break;
|
||||
case 'useCSS3hyphenation':
|
||||
if (assert('useCSS3hyphenation', 'boolean')) {
|
||||
css3 = obj[key];
|
||||
}
|
||||
break;
|
||||
case 'unhide':
|
||||
if (assert('unhide', 'string')) {
|
||||
unhide = obj[key];
|
||||
}
|
||||
break;
|
||||
default:
|
||||
onError(new Error('Hyphenator.config: property ' + key + ' not known.'));
|
||||
}
|
||||
@ -1923,7 +2170,7 @@ var Hyphenator = (function (window) {
|
||||
autoSetMainLanguage(undefined);
|
||||
gatherDocumentInfos();
|
||||
//console.log('preparing for ' + contextWindow.location.href);
|
||||
prepare(hyphenateDocument);
|
||||
prepare(hyphenateLanguageElements);
|
||||
if (displayToggleBox) {
|
||||
toggleBox();
|
||||
}
|
||||
@ -2022,8 +2269,7 @@ var Hyphenator = (function (window) {
|
||||
if (n.nodeType === 3 && n.data.length >= min) { //type 3 = #text -> hyphenate!
|
||||
n.data = n.data.replace(Hyphenator.languages[lang].genRegExp, hyphenate);
|
||||
} else if (n.nodeType === 1) {
|
||||
// Modified by Kovid to use element lang only if it has been loaded
|
||||
if (n.lang !== '' && Hyphenator.languages.hasOwnProperty(n.lang)) {
|
||||
if (n.lang !== '') {
|
||||
Hyphenator.hyphenate(n, n.lang);
|
||||
} else {
|
||||
Hyphenator.hyphenate(n, lang);
|
||||
@ -2115,7 +2361,7 @@ var Hyphenator = (function (window) {
|
||||
storeConfiguration();
|
||||
toggleBox();
|
||||
} else {
|
||||
hyphenateDocument();
|
||||
hyphenateLanguageElements('*');
|
||||
Hyphenator.doHyphenation = true;
|
||||
storeConfiguration();
|
||||
toggleBox();
|
||||
|
BIN
resources/viewer/hyphenate/patterns.zip
Normal file
BIN
resources/viewer/hyphenate/patterns.zip
Normal file
Binary file not shown.
File diff suppressed because one or more lines are too long
@ -1,13 +0,0 @@
|
||||
// For questions about the Bengali hyphenation patterns
|
||||
// ask Santhosh Thottingal (santhosh dot thottingal at gmail dot com)
|
||||
Hyphenator.languages['bn'] = {
|
||||
leftmin : 2,
|
||||
rightmin : 2,
|
||||
shortestPattern : 1,
|
||||
longestPattern : 1,
|
||||
specialChars : unescape("আঅইঈউঊঋএঐঔকগখঘঙচছজঝঞটঠডঢণতথদধনপফবভমযরলশষসহিীাুূৃোোৈৌৗ্ঃং%u200D"),
|
||||
patterns : {
|
||||
2 : "অ1আ1ই1ঈ1উ1ঊ1ঋ1এ1ঐ1ঔ1ি1া1ী1ু1ৃ1ে1ো1ৌ1ৗ1্2ঃ1ং11ক1গ1খ1ঘ1ঙ1চ1ছ1জ1ঝ1ঞ1ট1ঠ1ড1ঢ1ণ1ত1থ1দ1ধ1ন1প1ফ1ব1ভ1ম1য1র1ল1শ1ষ1স1হ",
|
||||
3 : "2ঃ12ং1"
|
||||
}
|
||||
};
|
File diff suppressed because one or more lines are too long
@ -1,16 +0,0 @@
|
||||
Hyphenator.languages['da'] = {
|
||||
leftmin : 2,
|
||||
rightmin : 2,
|
||||
shortestPattern : 2,
|
||||
longestPattern : 8,
|
||||
specialChars : "æøå",
|
||||
patterns : {
|
||||
3 : "a3ca1ea3ha3ja5oa5z1ba4bd1be1bib1j1bo4bsb5t3bub5w1by1ce3chck35cy3dad1b1ded1fd1gd3h1did3jd1kd1ld1m3dod1p1dud1v3dye3ee1he5x1faf1bf1d1fef1ff1gf1h1fif1k3fl1fof1p4ft1fuf1v3fy1gag1bg1d1geg3fg1gg1h1gi5gjg3kg1lg1m3gog3p1grg3v1gyi1ai3bi1ci3hi5ii5ji1uj5kj3rk5bk3hk1kk1tl1bl1fl3hl3jl1ll3r4ls1mam1bm3d1mem3fm1gm3h1mim3km1lm1mm1n3mom1r3my3nan1bn1c4nd1nen1f1nin1mn1n1non5pn3r4ns3nyn3zo3ao1co1eo5ho1jo3t3pap3dp3fp3mp3np1t1pup5vqu4r1br1fr1hr1lr1nr3pr1rs1d1ses1fs1msp44tbt1ht1mt1n4tsu1au1eu3iu5qv5hv5jv5kvl41vov5pv5t3vuy3ay3ey5o5bæ3dæ3døe3æe5å3fæ3fø3gæ3gåi3ø3kø3kå1mæ3mø3må3næ5nøo5åpå31sæ1sø5våæ3cæ3eæ5iæ5oø3eå1då1eå5hå3lå3t",
|
||||
4 : "_ae3_om1_po15adg5afgaf3r5afsa4gia4gya5kaa3kea5kraku5a3laa1lea1lial3ka1loa3lua1lya3nu3anva5pea3pia5poa1ra1arba1re5arga1ria3roa3saa3sca1sia3ska3soa1tea1tia1toa5tra1tua5vaa1vebe1k4b1n1br4bs5kb3sob1stby5s4c1c4ch_ci4oda4sd1d4de5ddi1edi5l4d1n4dopd5ovd5rud4smd4sud3tad1tedt5od5trdt5udub5e5ade3afe5age3ake1ale3ane5ape3ate3blebs3e1cie4do3effe3fr3efte3gue3inei5se3jee1kae3kee3kle5kre3kue1kve5kye3lee1lie3loe5lue3lyem1s4enne4noe5nue5ole3ope1ore3ovepi3e1pre3rae1ree1rier1ker3se5rye1tae1tee1tie3tje1toe3tre3tue1tye3ume3un3eure1vae3vee1vifej4f1s4f3taf1tef1tif5toge3sgi4bg5ovgs1ag4segs1pgs1vg3tag1teg1tig5tog3trgt4sg3udgun5g5yd4ha_he5s4hethi4ehi3s4h3thun4hvo4i3dri1eli1eni3erif3ri3gui1kai1keik1li5koi3kuik3vi3liil3ki1loil5ui3mu5infin3si3nui3odi3ogi5oki3olion4i3oti5pii5pri3rei3riir5ti3sci3sii4smis3pi1tai1tei1tii3toi3tri1tui3tyi1vai1vei1vij3agjds1j3lej3lijre5ju3s5kapk5au5kavki3ek1le3kluk4ny5kod1konko3v1kra5kryk1siks3kks1pks5vkt5s3kur1kus3kutk4vok4vu5lab5lam1latl3dr1le_5led3len1ler1les4leuli5ol1kel1kol3kyl5mul3op3lov4l3pl4psl5sjl1tal1tel3tilt3ol3trl3tulu5ll3vel3vimi3kmi4o4mopm1pem3pim3plm1pom3prm5skms3pms5vm3tam3tem3tim3trm1ud1mul4nak1naln3drne5aneo4n4go4n1h4nimni5on1ken1kon3krn3kun5kv4n1ln3sin1tan1ten1tin3ton1trn3tun3ty4n1vo4asod5sof5ro5ino3kao1keo3kuo3lao3leo1lio1loo3luo5ly1omron3kook5o3oro5ovo3piop3lop3rop3s4or_o3rior3kor5oo3sio3soo1teo5unov4s4pec3pen1perpe5spe3u4p5h1pla5pok3potp4rop3skp5sops4pp3stpu5b5py34rafr3dr1relr1guri1er3kar1ker1kir3kurmo4r5muro1bro3pr3orr1sar1sirs4nr3spr5sur3svr1ter1tir3tort3sr5tyr3ud5rutr3var1ver3viry4ss3af1sams3aps1ar1sat4s1bsdy4s4ed4s3h1sig5sis5sit5sius5ju4sk_1skes3kl5skys1les1lislo35slus5lys4myso5k5sol3sons1pls5r4s1s44st_5stj3sto1strs1ud3suls3un3surs3ve3s4y5ta_1tag3tegteo14t1f6t3g3tid4t3k4t1l4t3pt4ra1tryt3sit3st4t1t5turt5ve1typ5udlud5rud3s3udvugs3u5guu5klu1lau1leu5lyu5peup5lu3rau3reu3rous5au3siu5sous5vu1teu1tiu1tout5r5u5vva5d1vedve3s5vet1visv3lev5livls1v5rev3stv5suy5dry3key5kiy3koy3kvy5liy5loy5muyns5y1pey3piy3rey3riy3siy3tiy5vezi5o_så3a3tøa5væe3læe3løe3røe5tæe5tøe1vægiø4g4søg5så3gø1i5tæl3væ5løsm5tån3kæn5tæo5læor3ø5præ5pædr5kær5tær5tør3vær5æl4røn5rør3rådr5års4kå3slås4næ5stø1stås5økti4øt4søt5såt3væu3læy5vææb3læg5aægs5æ5kvæ1reæ3riær5sæ5siæ3soæ3veøde5ø1jeø3keø3leøms5ø1reø3riør5oø1veå3reå5sk",
|
||||
5 : "_an3k_an1s_be1t_her3_ove4_til3_yd5rab5le3abstaf4ria4gefag5inag5si3agtiais5t4alkval5siam4paar5af3a3spa3stea3stia1ta1ato5vba4tibe3robe5rube1s4be1trbi5skbo4grbo3rabo5rece5ro4d3afde5sk3drif3drivd5rosds5ands5ind1skidsu5lds5viea4laed5aredde4ed5raed3re4e1koek5sa3ekspe3ladel3akel3are1lase4lek3elem5elimel5sae4maden5ake4nanen3soer3afe4rage4rake4ref5erhve4ribero5der5over5tre3rumer5unfa4cefags3fejl1fo4rif5tvig3artgi3st4g5omgsha4g5slags3org4strheds3hi4n5ho5koho5vehund3i4bleids5ki3et_ik3reik5riiks5tik4tui3lagil3egil5ejil5elind3tings1in4svions1i5o5ri3plii3stii5suakel5ske3skke5stki3stk5lakko3ra3kortks3ank3stek5stuk4tarkti4ekt5relad3r5lagdld3st4lelele4molfin4l1go1li4galo4du4l5orlses1ls5inl4taf4m5ejm5ingmmen5mo4da4m5ovmse5sms5inm3stemu1lind5sind5sknd5spne4dan3erkn5erlne5slne5stni3stn3ordn1skuns3pon1stan5stint4suob3lio4dinod5riod5uno4geko4gelo4g5oog5reog5sk3optaor1an3ordnord5so3re_o3rego3reko3rero3retor5imor3slor3stpa5ghp5anlpe1rap4lan4ple_4pler4ples4p5p41procp5ulera5is4rarbrd4s34reksre5la5rese4ressre3st5rettri5la4rimor4ing4rinp4rintrk3sorre5sr5skrr5stur5talrt3rer5trir5trosa4ma5s4erse4se4s1g4si4bls5int1skabsk5s44snins4nit5som_3somms5oms5somt4s1op3spec4sper3s4pi1stanst5as3stat1stav1ste_1sted3stel1sten5step3stes5stetst5om1sy1s4tanvteds55tekn5termte5roti4enti3stto5rato1reto1ritor4m4trestro5vts4pats5prts5ult5udsue4t5uk4tauk4tru1reru5skaut5s43varm4v5omyk3liyk4s5yr3eky5t3r_ær5i_øv3rbrød35drøvdstå4er5øn4n5æb4s5ænså4r53værd1værkæ4gekæ4g5rælle4æn1drær4maær4moæ3steøn3støn4t3ørne3års5t",
|
||||
6 : "_be5la_bi4tr_der3i_ne4t5ade5la5a4f1l3analyan4k5ra4t5ind5antade4rig4dretteddel5ed3rined4stre4j5el3eksemer5egeetek4sfor1enger3ini4l5idinter1i1sterit5re_jek4to4j5en_ke4t5ak4terhla4g3rlfind54l5ins4l3intmi5stynemen4nta4lent4s5toi6s5eo3re3so4r5in4po3rer4d5arr5enssre5spore5s4urro4n5r4sk5vr4telir4t5orrt5ratrun4da5s4tam5stemo3ster_tands3tede4ltli4s5uge4riu4r3egve4l5eve4reg3træk_ær4g5r",
|
||||
7 : "_hoved3bu4s5tr4de4lemder5eri5d4reveem4p5lee4v3erfjde4rerjlmeld5l4t5erfntiali43orientringse43sprog_vi4l3infø4r5en",
|
||||
8 : "_diagno54g5endengsde4leng5s4tidejlmel4dinement5eringso4rtialis5t",
|
||||
9 : "ldiagnos5lingeniø4"
|
||||
}
|
||||
};
|
File diff suppressed because one or more lines are too long
@ -1,20 +0,0 @@
|
||||
// Hyphenation patterns for Modern Monotonic Greek.
|
||||
// Created by Dimitrios Filippou with some ideas borrowed from
|
||||
// Yannis Haralambous, Kostis Dryllerakis and Claudio Beccari.
|
||||
// From http://tug.org/svn/texhyphen/branches/ptex/hyph-utf8/tex/generic/hyph-utf8/patterns/tex/hyph-el-monoton.tex
|
||||
// Converted by Pablo Rodríguez (hyphenator at pragmata dot tk)
|
||||
Hyphenator.languages['el-monoton'] = Hyphenator.languages['el'] = {
|
||||
leftmin : 2,
|
||||
rightmin : 2,
|
||||
shortestPattern : 1,
|
||||
longestPattern : 7,
|
||||
specialChars : "αεηιουωϊϋάέήίόύώΐΰίάύέήόώβγκδζθλμπντξρσϲςφχψ'ʼ᾿’᾽",
|
||||
patterns : {
|
||||
2 : "α1ε1η1ι1ο1υ1ω1ϊ1ϋ1ά1έ1ή1ί1ό1ύ1ώ1ΐ1ΰ14'4ʼ4᾿",
|
||||
3 : "α2ια2ία2ίά2ιά2ιά2ϊά2ϊα2υα2ύα2ύά3υά3υε2ιε2ίε2ίέ2ιέ2ιέ2ϊέ2ϊε2υε2ύε2ύέ3υέ3υη2υη2ύη2ύή3υή3υο2ιο2ίο2ίό2ιό2ιό2ϊό2ϊο2υο2ύο2ύό3υό3υυ2ιυ2ίυ2ίύ3ιύ3ια2ηα2ϊα2ϋε2ϊε2ϋο2ηο2ϊι2αι2άι2άι2ει2έι2έι2οι2όι2όι2ωι2ώι2ώ_ι3_ί3_ί3η2αη2άη2άη2εη2έη2έη2οη2όη2όη2ωη2ώη2ώ_η3_ή3_ή3υ2αυ2άυ2άυ2ου2όυ2όυ2ωυ2ώυ2ώ_υ3_ύ3_ύ34β_4γ_4δ_4ζ_4θ_4κ_4λ_4μ_4ν_4ξ_4π_4ρ_4σ_4ϲ_4ς_4τ_4φ_4χ_4ψ_4β'4βʼ4β᾿4γ'4γʼ4γ᾿4δ'4δʼ4δ᾿4ζ'4ζʼ4ζ᾿4θ'4θʼ4θ᾿4κ'4κʼ4κ᾿4λ'4λʼ4λ᾿4μ'4μʼ4μ᾿4ν'4νʼ4ν᾿4ξ'4ξʼ4ξ᾿4π'4πʼ4π᾿4ρ'4ρʼ4ρ᾿4σ'4σʼ4σ᾿4ϲ'4ϲʼ4ϲ᾿4τ'4τʼ4τ᾿4φ'4φʼ4φ᾿4χ'4χʼ4χ᾿4ψ'4ψʼ4ψ᾿_β4_γ4_δ4_ζ4_θ4_κ4_λ4_μ4_ν4_ξ4_π4_ρ4_σ4_ϲ4_τ4_φ4_χ4_ψ4",
|
||||
4 : "ά3η_ά3η_ά3ι_ά3ι_ο2ειό3η_ό3η_ό3ι_ό3ι_4γκ_4μπ_4ντ_4τζ_4τσ_4τϲ_4τς_4μπ'4μπʼ4μπ᾿4ντ'4ντ’4ντ᾿4τζ'4τζʼ4τζ᾿4τσ'4τσʼ4τσ᾽4τϲ'4τϲʼ4τϲ᾿4β1β4γ1γ4δ1δ4ζ1ζ4θ1θ4κ1κ4λ1λ4μ1μ4ν1ν4π1π4ρ1ρ4σ1σ4ϲ1ϲ4τ1τ4φ1φ4χ1χ4ψ1ψ4β1ζ4β1θ4β1κ4β1μ4β1ν4β1ξ4β1π4β1σ4β1ϲ4β1τ4β1φ4β1χ4β1ψ4γ1β4γ1ζ4γ1θ4γ1μ4γ1ξ4γ1π4γ1σ4γ1ϲ4γ1τ4γ1φ4γ1χ4γ1ψ4δ1β4δ1γ4δ1ζ4δ1θ4δ1κ4δ1λ4δ1ξ4δ1π4δ1σ4δ1ϲ4δ1τ4δ1φ4δ1χ4δ1ψ4ζ1β4ζ1γ4ζ1δ4ζ1θ4ζ1κ4ζ1λ4ζ1μτζ2μ4ζ1ν4ζ1ξ4ζ1π4ζ1ρ4ζ1σ4ζ1ϲ4ζ1τ4ζ1φ4ζ1χ4ζ1ψ4θ1β4θ1γ4θ1δ4θ1ζ4θ1κ4θ1μσθ2μϲθ2μ4θ1ξ4θ1π4θ1σ4θ1ϲ4θ1τ4θ1φ4θ1χ4θ1ψ4κ1β4κ1γ4κ1δ4κ1ζ4κ1θ4κ1μ4κ1ξ4κ1π4κ1σ4κ1ϲ4κ1φ4κ1χ4κ1ψ4λ1β4λ1γ4λ1δ4λ1ζ4λ1θ4λ1κ4λ1μ4λ1ν4λ1ξ4λ1π4λ1ρ4λ1σ4λ1ϲ4λ1τ4λ1φ4λ1χ4λ1ψ4μ1β4μ1γ4μ1δ4μ1ζ4μ1θ4μ1κ4μ1λ4μ1ξ4μ1ρ4μ1σ4μ1ϲ4μ1τ4μ1φ4μ1χ4μ1ψ4ν1β4ν1γ4ν1δ4ν1ζ4ν1θ4ν1κ4ν1λ4ν1μ4ν1ξ4ν1π4ν1ρ4ν1σ4ν1ϲ4ν1φ4ν1χ4ν1ψ4ξ1β4ξ1γ4ξ1δ4ξ1ζ4ξ1θ4ξ1κ4ξ1λ4ξ1μ4ξ1ν4ξ1π4ξ1ρ4ξ1σ4ξ1ϲ4ξ1τ4ξ1φ4ξ1χ4ξ1ψ4π1β4π1γ4π1δ4π1ζ4π1θ4π1κ4π1μ4π1ξ4π1σ4π1ϲ4π1φ4π1χ4π1ψ4ρ1β4ρ1γ4ρ1δ4ρ1ζ4ρ1θ4ρ1κ4ρ1λ4ρ1μ4ρ1ν4ρ1ξ4ρ1π4ρ1σ4ρ1ϲ4ρ1τ4ρ1φ4ρ1χ4ρ1ψ4σ1δ4ϲ1δ4σ1ζ4ϲ1ζ4σ1ν4ϲ1ν4σ1ξ4ϲ1ξ4σ1ρ4ϲ1ρ4σ1ψ4ϲ1ψ4τ1β4τ1γ4τ1δ4τ1θ4τ1κ4τ1ν4τ1ξ4τ1π4τ1φστ2φϲτ2φ4τ1χ4τ1ψ4φ1β4φ1γ4φ1δ4φ1ζ4φ1κ4φ1μ4φ1ν4φ1ξ4φ1π4φ1σ4φ1ϲ4φ1χ4φ1ψ4χ1β4χ1γ4χ1δ4χ1ζ4χ1κ4χ1μ4χ1ξ4χ1π4χ1σ4χ1ϲ4χ1φ4χ1ψ4ψ1β4ψ1γ4ψ1δ4ψ1ζ4ψ1θ4ψ1κ4ψ1λ4ψ1μ4ψ1ν4ψ1ξ4ψ1π4ψ1ρ4ψ1σ4ψ1ϲ4ψ1τ4ψ1φ4ψ1χ4βρ_4γλ_4κλ_4κτ_6κς_6κϲ_4κσ_4λς_4λϲ_4λσ_4μς_4μϲ_4μσ_4νς_4νϲ_4νσ_4ρς_4ρϲ_4ρσ_4σκ_4ϲκ_4στ_4ϲτ_4τλ_4τρ_4φτ_",
|
||||
5 : "ο3ϊ3όο3ϊ3ό4γ1κτ4μ1πτ4ν1τζ4ν1τσ4ν1τϲ4γκς_4γκϲ_4γκσ_4μπλ_4μπν_4μπρ_4ντς_4ντϲ_4ντσ_",
|
||||
6 : "4ρ5γ2μ4ρ5θ2μ4λ5κ2μ4ρ5κ2μ4ν5κ2φ4γ5ξ2τ4ρ5ξ2τ4ρ5φ2ν4ρ5χ2μ4μ5ψ2τ4γ5κ2φ4γκ1ντ4γκ1τζ4γκ1τσ4γκ1τϲ4μπ1ντ4μπ1τζ4μπ1τσ4μπ1τϲ4ντ1μπ4τσ1γκ4τϲ1γκ4τσ1μπ4τϲ1μπ4τσ1ντ4τϲ1ντ",
|
||||
10 : "4χτ_4γκ1μπ"
|
||||
}
|
||||
};
|
@ -1,26 +0,0 @@
|
||||
// Hyphenation patterns for Modern Polytonic Greek.
|
||||
// Created by Dimitrios Filippou with some ideas borrowed from
|
||||
// Yannis Haralambous, Kostis Dryllerakis and Claudio Beccari.
|
||||
// From http://tug.org/svn/texhyphen/branches/ptex/hyph-utf8/tex/generic/hyph-utf8/patterns/tex/hyph-el-polyton.tex
|
||||
// Converted by Pablo Rodríguez (hyphenator at pragmata dot tk)
|
||||
Hyphenator.languages['el-polyton'] = {
|
||||
leftmin : 2,
|
||||
rightmin : 2,
|
||||
shortestPattern : 1,
|
||||
longestPattern : 11,
|
||||
specialChars : "αεηιουωϊϋἀἁἂἃἄἅἆἇἐἑἒἓἔἕἠἡἢἣἤἥἦἧἰἱἲἳἴἵἶἷὀὁὂὃὄὅὐὑὒὓὔὕὖὗὠὡὢὣὤὥὦὧὰὲὴὶὸὺὼᾀᾁᾂᾃᾄᾅᾆᾇᾐᾑᾒᾓᾔᾕᾖᾗᾠᾡᾢᾣᾤᾥᾦᾧᾲᾳᾴᾶᾷῂῃῄῆῇῒῖῗῢῦῧῲῳῴῶῷάέήίόύώΐΰάέήίόύώΐΰβγκδζθλμπντξρσϲςφχψ'ʼ᾿’᾽ῤῥ",
|
||||
patterns : {
|
||||
2 : "α1ε1η1ι1ο1υ1ω1ϊ1ϋ1ἀ1ἁ1ἂ1ἃ1ἄ1ἅ1ἆ1ἇ1ἐ1ἑ1ἒ1ἓ1ἔ1ἕ1ἠ1ἡ1ἢ1ἣ1ἤ1ἥ1ἦ1ἧ1ἰ1ἱ1ἲ1ἳ1ἴ1ἵ1ἶ1ἷ1ὀ1ὁ1ὂ1ὃ1ὄ1ὅ1ὐ1ὑ1ὒ1ὓ1ὔ1ὕ1ὖ1ὗ1ὠ1ὡ1ὢ1ὣ1ὤ1ὥ1ὦ1ὧ1ὰ1ὲ1ὴ1ὶ1ὸ1ὺ1ὼ1ᾀ1ᾁ1ᾂ1ᾃ1ᾄ1ᾅ1ᾆ1ᾇ1ᾐ1ᾑ1ᾒ1ᾓ1ᾔ1ᾕ1ᾖ1ᾗ1ᾠ1ᾡ1ᾢ1ᾣ1ᾤ1ᾥ1ᾦ1ᾧ1ᾲ1ᾳ1ᾴ1ᾶ1ᾷ1ῂ1ῃ1ῄ1ῆ1ῇ1ῒ1ῖ1ῗ1ῢ1ῦ1ῧ1ῲ1ῳ1ῴ1ῶ1ῷ1ά1έ1ή1ί1ό1ύ1ώ1ΐ1ΰ1ά1έ1ή1ί1ό1ύ1ώ1ΐ1ΰ16'6ʼ6᾿",
|
||||
3 : "α2ια2ία2ία2ὶα2ῖα2ἰα2ἴα2ἲα2ἶα2ἱα2ἵα2ἳα2ἷά2ιά2ιά2ϊά2ϊα2υα2ύα2ύα2ὺα2ῦα2ὐα2ὔα2ὒα2ὖα2ὑα2ὕα2ὓα2ὗά3υά3υε2ιε2ίε2ίε2ὶε2ῖε2ἰε2ἴε2ἲε2ἶε2ἱε2ἵε2ἳε2ἷέ2ιέ2ιέ2ϊέ2ϊε2υε2ύε2ύε2ὺε2ῦε2ὐε2ὔε2ὒε2ὖε2ὑε2ὕε2ὓε2ὗέ3υέ3υη2υη2ύη2ύη2ὺη2ῦη2ὐη2ὔη2ὒη2ὖη2ὑη2ὕη2ὓη2ὗο2ιο2ίο2ίο2ὶο2ῖο2ἰο2ἴο2ἲο2ἶο2ἱο2ἵο2ἳο2ἷό2ιό2ιό2ϊό2ϊο2υο2ύο2ύο2ὺο2ῦο2ὐο2ὔο2ὒο2ὖο2ὑο2ὕο2ὓο2ὗό3υό3υυ2ιυ2ίυ2ίυ2ὶυ2ῖυ2ἰυ2ἴυ2ἲυ2ἶυ2ἱυ2ἵυ2ἳυ2ἷα2ηα2ϊα2ϋά3ϋά3ϋε2ηέ2ηέ2ηε2ϊε2ϋό2ηό2ηο2ϊω2ιὠ2ιι2αι2άι2άι2ὰι2ᾶι2ει2έι2έι2ὲι2οι2όι2όι2ὸι2ωι2ώι2ώι2ὼι2ῶ_ί3_ί3_ῖ3_ἰ3_ἱ3η2αῃ2αη2άη2άη2ὰη2ᾶῃ2άῃ2άῃ2ὰῃ2ᾶη2εῃ2εη2έη2έη2ὲῃ2έῃ2έῃ2ὲη2οῃ2οη2όη2όη2ὸῃ2όῃ2όῃ2ὸη2ωῃ2ωη2ώη2ώη2ὼη2ῶῃ2ώῃ2ώῃ2ὼῃ2ῶ_ή3_ή3_ῆ3_ἠ3_ἡ3υ2αυ2άυ2άυ2ὰυ2ᾶυ2ευ2έυ2έυ2ὲυ2ου2όυ2όυ2ὸυ2ωυ2ώυ2ώυ2ὼυ2ῶ_ύ3_ύ3_ῦ3_ὑ36β_6γ_6δ_6ζ_6θ_6κ_6λ_6μ_6ν_6ξ_6π_6ρ_6σ_6ϲ_6ς_6τ_6φ_6χ_6ψ_6β'6βʼ6β᾿6γ'6γʼ6γ᾿6δ'6δʼ6δ᾿6ζ'6ζʼ6ζ᾿6θ'6θʼ6θ᾿6κ'6κʼ6κ᾿6λ'6λʼ6λ᾿6μ'6μʼ6μ᾿6ν'6νʼ6ν᾿6ξ'6ξʼ6ξ᾿6π'6πʼ6π᾿6ρ'6ρʼ6ρ᾿6σ'6σʼ6σ᾿6ϲ'6ϲʼ6ϲ᾿6τ'6τʼ6τ᾿6φ'6φʼ6φ᾿6χ'6χʼ6χ᾿6ψ'6ψʼ6ψ᾿_β6_γ6_δ6_ζ6_θ6_κ6_λ6_μ6_ν6_ξ6_π6_ρ6_σ6_ϲ6_τ6_φ6_χ6_ψ6",
|
||||
4 : "ά3η_ά3η_ά3ι_ά3ι_ά3ϊ_ά3ϊ_ό2ειό2ειό3η_ό3η_ό3ι_ό3ι_ό3ϊ_ό3ϊ_6γκ_6μπ_6ντ_6τζ_6τσ_6τϲ_6τς_6μπ'6μπʼ6μπ᾿6ντ'6ντ’6ντ᾿6τζ'6τζʼ6τζ᾿6τσ'6τσʼ6τσ᾽6τϲ'6τϲʼ6τϲ᾿4β1β4γ1γ4δ1δ4ζ1ζ4θ1θ4κ1κ4λ1λ4μ1μ4ν1ν4π1π4ρ1ρ4ῤ1ῥ4σ1σ4ϲ1ϲ4τ1τ4φ1φ4χ1χ4ψ1ψ4β1ζ4β1θ4β1κ4β1μ4β1ν4β1ξ4β1π4β1σ4β1ϲ4β1τ4β1φ4β1χ4β1ψ4γ1β4γ1ζ4γ1θ4γ1κ4γ1μ4γ1ξ4γ1π4γ1σ4γ1ϲ4γ1τ4γ1φ4γ1χ4γ1ψ4δ1β4δ1γ4δ1ζ4δ1θ4δ1κ4δ1λ4δ1ξ4δ1π4δ1σ4δ1ϲ4δ1τ4δ1φ4δ1χ4δ1ψ4ζ1β4ζ1γ4ζ1δ4ζ1θ4ζ1κ4ζ1λ4ζ1μτζ2μ4ζ1ν4ζ1ξ4ζ1π4ζ1ρ4ζ1σ4ζ1ϲ4ζ1τ4ζ1φ4ζ1χ4ζ1ψ4θ1β4θ1γ4θ1δ4θ1ζ4θ1κ4θ1μσθ2μϲθ2μ4θ1ξ4θ1π4θ1σ4θ1ϲ4θ1τ4θ1φ4θ1χ4θ1ψ4κ1β4κ1γ4κ1δ4κ1ζ4κ1θ4κ1ξ4κ1π4κ1σ4κ1ϲ4κ1φ4κ1χ4κ1ψ4λ1β4λ1γ4λ1δ4λ1ζ4λ1θ4λ1κ4λ1μ4λ1ν4λ1ξ4λ1π4λ1ρ4λ1σ4λ1ϲ4λ1τ4λ1φ4λ1χ4λ1ψ4μ1β4μ1γ4μ1δ4μ1ζ4μ1θ4μ1κ4μ1λ4μ1ξ4μ1π4μ1ρ4μ1σ4μ1ϲ4μ1τ4μ1φ4μ1χ4μ1ψ4ν1β4ν1γ4ν1δ4ν1ζ4ν1θ4ν1κ4ν1λ4ν1μ4ν1ξ4ν1π4ν1ρ4ν1σ4ν1ϲ4ν1τ4ν1φ4ν1χ4ν1ψ4ξ1β4ξ1γ4ξ1δ4ξ1ζ4ξ1θ4ξ1κ4ξ1λ4ξ1μ4ξ1ν4ξ1π4ξ1ρ4ξ1σ4ξ1ϲ4ξ1τ4ξ1φ4ξ1χ4ξ1ψ4π1β4π1γ4π1δ4π1ζ4π1θ4π1κ4π1μ4π1ξ4π1σ4π1ϲ4π1φ4π1χ4π1ψ4ρ1β4ρ1γ4ρ1δ4ρ1ζ4ρ1θ4ρ1κ4ρ1λ4ρ1μ4ρ1ν4ρ1ξ4ρ1π4ρ1σ4ρ1ϲ4ρ1τ4ρ1φ4ρ1χ4ρ1ψ4σ1δ4ϲ1δ4σ1ζ4ϲ1ζ4σ1ν4ϲ1ν4σ1ξ4ϲ1ξ4σ1ρ4ϲ1ρ4σ1ψ4ϲ1ψ4τ1β4τ1γ4τ1δ4τ1θ4τ1ν4τ1ξ4τ1π4τ1φστ2φϲτ2φ4τ1χ4τ1ψ4φ1β4φ1γ4φ1δ4φ1ζ4φ1κ4φ1ν4φ1ξ4φ1π4φ1σ4φ1ϲ4φ1χ4φ1ψ4χ1β4χ1γ4χ1δ4χ1ζ4χ1κ4χ1μ4χ1ξ4χ1π4χ1σ4χ1ϲ4χ1φ4χ1ψ4ψ1β4ψ1γ4ψ1δ4ψ1ζ4ψ1θ4ψ1κ4ψ1λ4ψ1μ4ψ1ν4ψ1ξ4ψ1π4ψ1ρ4ψ1σ4ψ1ϲ4ψ1τ4ψ1φ4ψ1χβγ2κσγ2κϲγ2κσμ2πϲμ2πμν2τσν2τϲν2τ6βρ_6γλ_6κλ_6κτ_6κς_6κϲ_6κσ_6λς_6λϲ_6λσ_6μς_6μϲ_6μσ_6νς_6νϲ_6νσ_6ρς_6ρϲ_6ρσ_6σκ_6ϲκ_6στ_6ϲτ_6τλ_6τρ_6φτ_6χτ_",
|
||||
5 : "ο3ϊ3όο3ϊ3όο3ϊ3ὸβ5ν2τζ5ν2τλ5ν2τρ5ν2τ",
|
||||
6 : "4ρ5γ2μ4ρ5θ2μ4λ5κ2μ4ρ5κ2μ4γ5κ2φ4ν5κ2φ4γ5ξ2τ4ρ5ξ2τ4ρ5φ2ν4ρ5χ2μ4μ5ψ2τ4λ5γ2κ4ν5γ2κ4ρ5γ2κ4τ5γ2κ4ζ5μ2π4λ5μ2π4ν5μ2π4ρ5μ2πἄ5μ2ακἀ5μ2πρὄ5μ2ποὀ5μ2ποὀ5ν2τάὀ5ν2τάὀ5ν2τὰὀ5ν2τᾶ6μ2πλ_6μ2πν_6μ2πρ_",
|
||||
7 : "ἰ5γ2κου_ξε5γ2κ_ξέ5γ2κ_ξέ5γ2κ_σι5γ2κ_ϲι5γ2κἀ5μ2πάκἀ5μ2πάκἀ5μ2πανἀ5μ2πάρἀ5μ2πάρἀ5μ2πᾶρἀ5μ2παρἀρα5μ2πἰ5μ2πρα_κε5μ2π_λό5μ2π_λό5μ2π5μ2πέη_5μ2πέη_5μ2πεη_5μ2πογι_ξε5μ2π_ξέ5μ2π_ξέ5μ2π_ρε5μ2π_ρέ5μ2π_ρέ5μ2π_ρο5μ2πρό5μ2παρό5μ2παρό5μ2περό5μ2περό5μ2πωρό5μ2πωρο5μ2πῶρο5μ2παρο5μ2περο5μ2πωσό5μ2πασό5μ2παϲό5μ2παϲό5μ2πασό5μ2πεσό5μ2πεϲό5μ2πεϲό5μ2πεσο5μ2πῶϲο5μ2πῶσό5μ2πωσό5μ2πωϲό5μ2πωϲό5μ2πωσο5μ2παϲο5μ2πασο5μ2πεϲο5μ2πεσο5μ2πωϲο5μ2πω_τα5μ2π_χα5μ2π_χό5μ2π_χό5μ2π_ξε5ν2τ_ξέ5ν2τ_ξέ5ν2τ6γ2κ1τζ6γ2κ1τσ6γ2κ1τϲ6μ2π1τζ6μ2π1τσ6μ2π1τϲ6τσ5γ2κ6τϲ5γ2κ6τσ5μ2π6τϲ5μ2π6τσ5ν2τ6τϲ5ν2τ",
|
||||
8 : "ἐμι5γ2κρ_μπα5γ2κ_μπι5γ2κ_σπά5γ2κ_σπά5γ2κ_ϲπά5γ2κ_ϲπά5γ2κ_σπα5γ2κ_ϲπα5γ2κ_φιό5γ2κ_φιό5γ2κ_φιο5γ2κἀ6μ3πάριἀ6μ3πάριἀ6μ3παρι_γά5μ2πι_γά5μ2πι_γα5μ2πι_ζεϊ5μ2π_κό5μ2πρ_κό5μ2πρ_κο5μ2πρ_λι5μ2πρ5μ2πέης_5μ2πέης_5μ2πέηϲ_5μ2πέηϲ_5μ2πεης_5μ2πεηϲ_5μ2πέησ_5μ2πέησ_5μ2πεησ__μπι5μ2π_τρο6μ3π_τρό6μ3π_τρό6μ3π_ρου5μ2π_σέ5μ2πρ_σέ5μ2πρ_ϲέ5μ2πρ_ϲέ5μ2πρ_σνο5μ2π_ϲνο5μ2π_σού5μ2π_σού5μ2π_ϲού5μ2π_ϲού5μ2π_σου5μ2π_ϲου5μ2π_τζά5μ2π_τζά5μ2π_τζα5μ2π_τζι5μ2π_τό5μ2πρ_τό5μ2πρ_το5μ2πρ_φρα5μ2πἀ5ν2τάτζἀ5ν2τάτζ_βί5ν2τε_βί5ν2τε_βι5ν2τε_κα5ν2τρ_μαϊ5ν2τ_μπε5ν2τ_μπι5ν2τ_ντα5ν2τ5ν2τίβαν5ν2τίβαν_ρε5ν2τί_ρε5ν2τί_ρε5ν2τι_ροῦ5ν2τ_ρού5ν2τ_ρού5ν2τ_χα5ν2το_χα5ν2τρ_χά5ν2τρ_χά5ν2τρ6γ2κ5μ2π6γ2κ5ν2τ6μ2π5ν2τ6ν2τ5μ2π",
|
||||
9 : "5γ2κραντ_ἴντρι5γ2κἰντρι5γ2κ_μα5γ2κιό_μα5γ2κιό_ντά5γ2κλ_ντά5γ2κλ_ντα5γ2κλἀλα5μ2πουἀρλού5μ2πἀρλού5μ2πἀρλοῦ5μ2πἀρλου5μ2π_βό5μ2πιρ_βό5μ2πιρ_βο5μ2πιρ_κα5μ2πάδ_κα5μ2πάδ_κα5μ2πίν_κα5μ2πίν_κα5μ2πῖν_κα5μ2πιν_κά5μ2ποτ_κά5μ2ποτ_κα5μ2πότ_κα5μ2πότ_κα5μ2ποτ_καου5μ2π_καρα5μ2π5μ2πα5μ2π5μ2πά5μ2π5μ2πά5μ2π5μ2πέ5μ2π5μ2πέ5μ2π5μ2πε5μ2π_νό5μ2πελ_νό5μ2πελ_νο5μ2πελ_ντό5μ2πρ_ντό5μ2πρ_ντο5μ2πρ_σα2μ5ποτ_ϲα2μ5ποτ_τε5μ2πεσ_τε5μ2πεϲ_τζου5μ2π_τσά5μ2πα_τσά5μ2πα_τϲά5μ2πα_τϲά5μ2πα_τσα5μ2πα_τϲα5μ2παἀτρα5ν2τέἀτρα5ν2τέἀτρα5ν2τὲ_γιβε5ν2τ_γκάι5ν2τ_γκάι5ν2τ_γκάϊ5ν2τ_γκάϊ5ν2τ_γκαϊ5ν2τ_κα5ν2ταΐ_κα5ν2ταΐ_κα5ν2ταϊ_μα5ν2τάμ_μα5ν2τάμ_μα5ν2τὰμ_μα5ν2ταμ_μα5ν2τέμ_μα5ν2τέμ_μα5ν2τεμ_μεϊ5ν2τά_μεϊ5ν2τά_μεϊ5ν2τα_μο5ν2τέλ_μο5ν2τέλ_μο5ν2τελμο5ν2τέρνμο5ν2τέρνμο5ν2τερν_νισα5ν2τ_νιϲα5ν2τ_ρεζε5ν2τ_σε5ν2τέφ_σε5ν2τέφ_ϲε5ν2τέφ_ϲε5ν2τέφ_σε5ν2τεφ_ϲε5ν2τεφ_σε5ν2τοῦ_ϲε5ν2τοῦ_σε5ν2τού_σε5ν2τού_ϲε5ν2τού_ϲε5ν2τού_σε5ν2του_ϲε5ν2του_τσα5ν2τί_τσα5ν2τί_τϲα5ν2τί_τϲα5ν2τί_τσα5ν2τι_τϲα5ν2τι",
|
||||
10 : "_γιου5γ2κο_καρα5γ2κι_χούλι5γ2κ_χούλι5γ2κ_χουλι5γ2κ_γιαρα5μ2π_καλα5μ2πα_καλί5μ2πρ_καλί5μ2πρ_καλι5μ2πρ_κα5μ2παρέ_κα5μ2παρέ_κα5μ2παρὲ_κα5μ2παρε_καρνα5μ2π_κολι5μ2πρ_κου5μ2πού_κου5μ2πού_κου5μ2ποῦ_κου5μ2που5μ2πέηδες_5μ2πέηδες_5μ2πέηδεϲ_5μ2πέηδεϲ_5μ2πέηδεσ_5μ2πέηδεσ_5μ2πέηδων_5μ2πέηδων__μπό5μ2πιρ_μπό5μ2πιρ_μπο5μ2πιρ_μπο5μ2πότ_μπο5μ2πότ_μπο5μ2ποτ_σκα5μ2παβ_ϲκα5μ2παβ_ταβλα5μ2π_τζανα5μ2π_τρα5μ2πάλ_τρα5μ2πάλ_τρα5μ2παλ_φά5μ2πρικ_φά5μ2πρικ_φα5μ2πρικ_μπαλά5ν2τ_μπαλά5ν2τ_μπαλα5ν2τ_μπα5ν2ταν_μπου5ν2τα_μπου5ν2τρ",
|
||||
11 : "_καρα6μ3πόλ_καρα6μ3πόλ_καρα6μ3πολ_κολού5μ2πρ_κολού5μ2πρ_κολοῦ5μ2πρ_κολου5μ2πρ_κο6μ3πρέσσ_κο6μ3πρέσσ_κο6μ3πρέϲϲ_κο6μ3πρέϲϲ_κο6μ3πρεσσ_κο6μ3πρεϲϲ_κοντρα5μ2π_κωλού5μ2πρ_κωλού5μ2πρ_κωλοῦ5μ2πρ_κωλου5μ2πρ_μανιτό5μ2π_μανιτό5μ2π_μπα6μ3πάκι_μπα6μ3πάκι_μπα6μ3πακι_ρεπού5μ2πλ_ρεπού5μ2πλ_ρεπου5μ2πλ_τα6μ3περαμ_τα6μ3ποῦρλ_τα6μ3πούρλ_τα6μ3πούρλ_τρα5μ2ποῦκ_τρα5μ2πούκ_τρα5μ2πούκ_τρα5μ2πουκ_τσι5μ2πούκ_τσι5μ2πούκ_τϲι5μ2πούκ_τϲι5μ2πούκ_τσι5μ2πουκ_τϲι5μ2πουκ_τσι5μ2πούσ_τσι5μ2πούσ_τϲι5μ2πούϲ_τϲι5μ2πούϲ_τσι5μ2πουσ_τϲι5μ2πουϲ_γιαχου5ν2τ",
|
||||
12 : "_σαλτιπά5γ2κ_σαλτιπά5γ2κ_ϲαλτιπά5γ2κ_ϲαλτιπά5γ2κ_κουλού5μ2πρ_κουλού5μ2πρ_κουλοῦ5μ2πρ_κουλου5μ2πρ_μπου5μ2πούν_μπου5μ2πούν_μπου5μ2ποῦν_μπου5μ2πουν_χοντρο5μ2πα_λικβι5ν2ταρ_ντερμπε5ν2τ_ντου5ν2τούκ_ντου5ν2τούκ_ντου5ν2τοῦκ_ντου5ν2τουκ_φαστφου5ν2τ_φαϲτφου5ν2τ",
|
||||
13 : "_μπασκε2τ5μ2π_μπαϲκε2τ5μ2π_μπασι5μ2πουζ_μπαϲι5μ2πουζ"
|
||||
}
|
||||
};
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -1,17 +0,0 @@
|
||||
Hyphenator.languages['fi'] = {
|
||||
leftmin : 2,
|
||||
rightmin : 2,
|
||||
shortestPattern : 2,
|
||||
longestPattern : 7,
|
||||
specialChars : "öäå",
|
||||
patterns : {
|
||||
3 : "1ba1be1bi1bo1bu1by1da1de1di1do1du1dy1dä1dö1fa1fe1fi1fo1fu1fy1ga1ge1gi1go1gu1gy1gä1gö1ha1he1hi1ho1hu1hy1hä1hö1ja1je1ji1jo1ju1jy1jä1jö1ka1ke1ki1ko1ku1ky1kä1kö1la1le1li1lo1lu1ly1lä1lö1ma1me1mi1mo1mu1my1mä1mö1na1ne1ni1no1nu1ny1nä1nö1pa1pe1pi1po1pu1py1pä1pö1ra1re1ri1ro1ru1ry1rä1rö1sa1se1si1so1su1sy1sä1sö1ta1te1ti1to1tu1ty1tä1tö1va1ve1vi1vo1vu1vy1vä1vöä2yo1yö2ya1äa1öo1äo1öä2äö2öä2öö2ä_ä2u2sb2lb2rd2rf2lf2rg2lg2rk2lp2lp2rc2lq2v",
|
||||
4 : "y1a2y1o2u1y2y1u2ö3a2ö3o2ä3a2ä3o2ä1u2ö1u2u1ä2u1ö2e1aai1aao1aau1aau1eea1uui1uue1uuo1uuää1iää1eää3yi1ääe1ääy1ääi1ööa1eia1oie1aii1auy1eiai1aai1eai1oai1uau1aau1eeu1aie1aie1oie1yiu1aiu1eiu1ooi1aoi1eoi1ooi1uo1uiou1eou1oue1aui1euo1auo1ue1ö2ö1e2r2asl2as1k2vsc2hts2h",
|
||||
5 : "1st2raa1i2aa1e2aa1o2aa1u2ee1a2ee1i2ee1u2ee1y2ii1a2ii1e2ii1o2uu1a2uu1e2uu1o2uu1i2io1a2io1e2keus11b2lo1b2ri1b2ro1b2ru1d2ra1f2la1f2ra1f2re1g2lo1g2ra1k2ra1k2re1k2ri1k2va1p2ro1q2vich2r",
|
||||
6 : "1sp2lialous1rtaus1perus12s1ase2s1apuulo2s1bib3li",
|
||||
7 : "yli1o2pali1a2v2s1ohje1a2sian1a2siat1a2sioi2s1o2sa2n1o2sa_ydi2n12n1otto2n1oton2n1anto2n1anno2n1aika2n1a2jo2s1a2jo",
|
||||
8 : "2s1a2sia2n1o2pet2s1a2loialkei2s12n1e2dus2s1ajatu2s1y2rit2s1y2hti2n1a2jan2n1o2mai2n1y2lit2s1a2len2n1a2len",
|
||||
9 : "2s1o2pisk2n1o2pist2s1o2pist2s1i2dea_2s1i2dean2s1e2sity_suu2r1a2",
|
||||
11 : "1a2siaka2s1"
|
||||
}
|
||||
};
|
@ -1,26 +0,0 @@
|
||||
// The french hyphenation patterns are retrieved from
|
||||
// http://tug_org/svn/texhyphen/trunk/collaboration/repository/hyphenator/
|
||||
Hyphenator.languages['fr'] = {
|
||||
leftmin : 2,
|
||||
rightmin : 3,
|
||||
shortestPattern : 1,
|
||||
longestPattern : 14,
|
||||
specialChars : "àâçèéêîïôûœ’'",
|
||||
patterns : {
|
||||
2 : "1ç1j1q",
|
||||
3 : "1gè’â41zu1zo1zi1zè1zé1ze1za’y4_y41wu1wo1wi1we1wa1vy1vû1vu1vô1vo1vî1vi1vê1vè1vé1ve1vâ1va’û4_û4’u4_u41ba1bâ1ty1be1bé1bè1bê1tû1tu1tô1bi1bî1to1tî1ti1tê1tè1té1te1tà1tâ1ta1bo1bô1sy1sû1su1sœ1bu1bû1by2’21ca1câ1sô1ce1cé1cè1cê1so1sî1si1sê1sè1sé1se1sâ1sa1ry1rû1ru1rô1ro1rî1ri1rê1rè1ré1re1râ1ra’a41py1pû1pu1pô1po1pî1pi1pê1pè1pé1pe1pâ1pa_ô41ci1cî’ô4’o4_o41nyn1x1nû1nu1nœ1nô1no1nî1ni1nê1nè1né1ne1nâ1co1cô1na1my1mû1mu1mœ1mô1mo1mî1mi1cœ1mê1mè1mé1me1mâ1ma1ly1lû1lu1lô1lo1lî1li1lê1lè1cu1cû1cy1lé1d’1da1dâ1le1là1de1dé1dè1dê1lâ1la1ky1kû1ku1kô1ko1kî1ki1kê1kè1ké1ke1kâ1ka2jk_a4’î4_î4’i4_i41hy1hû1hu1hô1ho1hî1hi1hê1hè1hé1he1hâ1ha1gy1gû1gu1gô1go1gî1gi1gê_â41gé1ge1gâ1ga1fy1di1dî1fû1fu1fô1fo’e41fî1fi1fê1fè1do1dô1fé1fe1fâ1fa’è41du1dû1dy_è4’é4_é4’ê4_ê4_e41zy",
|
||||
4 : "1f2lab2h2ckg2ckp2cksd1s22ckb4ck_1c2k2chw4ze_4ne_2ckt1c2lad2hm1s22cht2chsch2r2chp4pe_1t2r1p2h_ph44ph_ph2l2phnph2r2phs1d2r2pht2chn4fe_2chm1p2l1p2r4me_1w2rch2l2chg1c2r2chb4ch_1f2r4le_4re_4de_f1s21k2r4we_1r2h_kh44kh_1k2h4ke_1c2h_ch44ge_4je_4se_1v2r_sh41s2h4ve_4sh_2shm2shr2shs4ce_il2l1b2r4be_1b2l4he_4te__th41t2h4th_g1s21g2r2thl1g2l2thm2thnth2r1g2n2ths2ckf",
|
||||
5 : "2ck3h4rhe_4kes_4wes_4res_4cke_éd2hi4vre_4jes_4tre_4zes_4ges_4des_i1oxy4gle_d1d2h_cul44gne_4fre_o1d2l_sch44nes_4les_4gre_1s2ch_réu24sch_4the_1g2hy4gue_2schs4cle_1g2ho1g2hi1g2he4ses_4tes_1g2ha4ves_4she_4che_4cre_4ces_t1t2l4hes_l1s2t4bes_4ble__con4xil3lco1ap4que_vil3l4fle_co1arco1exco1enco1auco1axco1ef4pes_co1é2per3h4mes__pe4r4bre_4pre_4phe_1p2né4ple__dé2smil3llil3lhil3l4dre_cil3lgil3l4fes_",
|
||||
6 : "’in1o2rcil4l4phre_4dres_l3lioni1algi2fent_émil4l4phle_rmil4l4ples_4phes_1p2neuextra14pres_y1asthpé2nul2xent__mé2sa2pent_y1algi4chre_1m2nès4bres_1p2tèr1p2tér4chle_’en1o24fles_oxy1a2avil4l_en1o24ques_uvil4lco1a2d4bles__in1a2’in1a21s2por_cons4_bi1u2’as2ta_in1e2’in1e2_in1é2’in1é21s2lov1s2lavco1acq2cent__as2ta_co1o24ches_hémi1é_in2er’in2er2s3homo1ioni_in1i2’in1i22went_4shes__ré1a2_ré1é2_ré1e2_ré2el_in1o2ucil4lco1accu2s3tr_ré2er_ré2èr4cles_2vent__ré1i22sent_2tent_2gent__ré1o24gues__re1s24sche_4thes_’en1a2e2s3ch4gres_1s2cop2lent__en1a22nent__in1u2’in1u24gnes_4cres_wa2g3n4fres_4tres_4gles_1octet_dé1o2_dé1io4thre__bi1au2jent__dé1a22zent_4vres_2dent_4ckes_4rhes__dy2s3sub1s22kent_2rent_2bent_3d2hal",
|
||||
7 : "a2g3nos3d2houdé3rent__dé3s2t_dé3s2pé3dent_2r3heur2r3hydri1s2tat2frent_io1a2ctla2w3re’in2u3l_in2u3l2crent_’in2uit_in2uit1s2caph1s2clér_ré2ussi2s3ché_re2s3t_re2s3s4sches_é3cent__seu2le’in2ond_in2ond’in2i3t_in2i3t’in2i3q_ré2aux_in2i3q2shent__di1alduni1a2x’in2ept2flent__in2eptuni1o2v2brent_co2nurb2chent_2quent_1s2perm1s2phèr_ma2c3kuevil4l1s2phér1s2piel1s2tein1s2tigm4chles_1s2tock1s2tyle1p2sych_pro1é2_ma2r1x_stil3lpusil3libril3lcyril3l_pré1s2thril3l_mé3san_pré1u2_mé2s1i_pré1o2_pré1i2piril3lpupil3lâ2ment__pré1e2_pré1é2_pré2au_pré1a22prent_2vrent_supero2_di1e2npoly1u2è2ment_poly1s2poly1o2poly1i2poly1è2poly1é2poly1e2poly1a2supe4r1capil3l2plent_armil5lsemil4lmil4letvacil4l_di2s3h3ph2tis2dlent_a2s3tro4phres_l2ment_i1è2drei1arthr2drent_4phles_supers2ô2ment_extra2i2phent_su3r2ah_su2r3hextra2chypo1u21alcool_per1u2_per1o2_per1i2_per1é2hypo1s2_per1a2hypo1o2hypo1i2hypo1é2_pen2tahypo1e2hypo1a2y1s2tome2s3cophyperu2hype4r1hypers2hypero21m2némohyperi21m2nési4chres_a1è2drehyperé2hypere2hypera2’oua1ou_oua1ouo1s2tomo1s2timo1s2tato1s2tasomni1s2tung2s3_dé3s2c2blent__bio1a2télé1e2télé1i22clent_télé1s22guent_1é2nerg2grent_2trent__dé2s1œ2t3heuro1è2dre2gnent_2glent_4thres__bi1a2t1é2drie_bi1a2c_i2g3nin3s2at_’i2g3ni2ckent__i2g3né’ab3réa’i2g3né_ab3réa_per1e2",
|
||||
8 : "_ma2l1ap_dy2s1u2_dy2s1o2_dy2s1i2n3s2ats__dy2s1a2distil3l1é2lectrinstil3l1s2trophe2n1i2vro2b3long1s2tomos_ae3s4ch’ae3s4ch_eu2r1a2ombud2s3’eu2r1a2_mono1s2_mono1u2o1s2téro_mono1o2eu1s2tato1s2tradfritil3la2l1algi_mono1i2_mono1é2_ovi1s2c’ovi1s2c_mono1e2_mono1a2co1assocpaléo1é2boutil3l1s2piros_ré2i3fi_pa2n1ischevil4l1s2patiaca3ou3t2_di1a2cé_para1s2_pa2r3héco1assur_su2b1é2tu2ment_su2ment__su2b1in_su2b3lupapil3lire3pent_’inte4r3_su2b1urab3sent__su2b1a2di2s3cophu2ment_fu2ment__intera2au2ment_as2ment_or2ment_’intera2_intere2pé1r2é2q_péri1os_péri1s2ja3cent__anti1a2_péri1u2’anti1a2er2ment__anti1e2ac3cent_ar2ment_to2ment_’intere2ré3gent_papil3leom2ment_’anti1e2photo1s2_anti1é2_interé2’anti1é2_anti1s2’anti1s23ph2talé’interé2ri2ment__interi2’interi2mi2ment_apo2s3tri2s3chio_pluri1ai2s3chia_intero2’intero2_inte4r3po1astre_interu2’interu2_inters2ai2ment_’inters2papil3la_tri1o2n_su2r1a2_pon2tet_pos2t3h_dés2a3mes3cent__pos2t3r_post1s2_tri1a2tta2ment__tri1a2nra2ment_is3cent__su2r1e2_tri1a2cfa2ment_da2ment__su3r2et_su2r1é2_mé2s1es_mé2g1oh_su2r1of_su2r1ox_re3s4ty_re3s4tu_ma2l1oc’a2g3nat_dé2s1é2_ma2l1entachy1a2_pud1d2ltchin3t2_re3s4trtran2s3p_bi2s1a2tran2s3hhémo1p2té3quent__a2g3nat_dé2s1i2télé1o2bo2g3nosiradio1a2télé1o2ppu2g3nacru3lent__sta2g3nre3lent__ré2a3le_di1a2mi",
|
||||
9 : "_ré2a3lit_dé3s2o3lthermo1s2_dé3s2ist_dé3s2i3rmit3tent_éni3tent__do3lent__ré2a3lisopu3lent__pa3tent__re2s3cap_la3tent__co2o3lie_re2s3cou_re2s3cri_ma2g3num_re2s3pir_dé3s2i3dco2g3nititran2s1a2tran2s1o2_dé3s2exu_re3s4tab_re3s4tag_dé3s2ert_re3s4tat_re3s4tén_re3s4tér_re3s4tim_re3s4tip_re3s4toc_re3s4toptran2s1u2_no2n1obs_ma2l1a2v_ma2l1int_prou3d2hpro2s3tativa3lent__ta3lent__rétro1a2_pro1s2cé_ma2l1o2dcci3dent__pa3rent__su2r1int_su2r1inf_su2r1i2mtor3rent_cur3rent__mé2s1u2stri3dent__dé3s2orm_su3r2ell_ar3dent__su3r2eaupru3dent__pré2a3lacla2ment__su3r2a3t_pos2t1o2_pos2t1inqua2ment_ter3gent_ser3gent_rai3ment_abî2ment_éci2ment_’ar3gent__ar3gent_rin3gent_tan3gent_éli2ment_ani2ment_’apo2s3ta_apo2s3tavélo1s2kivol2t1amp_dé3s2orp_dé2s1u2n_péri2s3ssesqui1a2’ana3s4trfir2ment_écu2ment_ser3pent_pré3sent_’ar3pent__ar3pent_’in1s2tab_in1s2tab’in2o3cul_in2o3culplu2ment_bou2ment_’in2exora_in2exora_su2b3linbru2ment__su3b2é3r_milli1am’in2effab_in2effab’in2augur_di1a2cid_in2augur_pa2n1opt’in2a3nit_in2a3nit1informat_ana3s4trvanil3lis_di1a2tom_su3b2altvanil3linstéréo1s2_pa2n1a2fo1s2tratuépi2s3cop_ci2s1alp1s2tructu1é2lément1é2driquepapil3lomllu2ment_",
|
||||
10 : "1s2tandardimmi3nent__émi3nent_imma3nent_réma3nent_épi3s4cope_in2i3miti’in2i3miti_res3sent_moye2n1â2gréti3cent__dé3s2a3crmon2t3réalinno3cent__mono1ï2dé_pa2n1a2méimpu3dent__pa2n1a2ra_amino1a2c’amino1a2c_pa2n1o2phinci3dent__ser3ment_appa3rent_déca3dent__dacryo1a2_dé3s2astr_re4s5trin_dé3s2é3gr_péri2s3ta_sar3ment__dé3s2oufr_re3s4tandchro2ment__com3ment__re2s3quil_re2s3pons_gem2ment__re2s3pect_re2s3ciso_dé3s2i3gn_dé3s2i3ligram2ment__dé3s2invo_re2s3cisitran3s2act’anti2enneindo3lent__sou3vent_indi3gent_dili3gent_flam2ment_impo3tent_inso3lent_esti2ment_’on3guent__on3guent_inti2ment__dé3s2o3défécu3lent_veni2ment_reli2ment_vidi2ment_chlo2r3é2tpu2g3nablechlo2r3a2cryth2ment_o2g3nomonicarê2ment__méta1s2ta_ma2l1aisé_macro1s2célo3quent_tran3s2ats_anti2enne",
|
||||
11 : "_contre1s2cperti3nent_conti3nent__ma2l1a2dro_in2é3lucta_psycho1a2n_dé3s2o3pil’in2é3luctaperma3nent__in2é3narratesta3ment__su2b3liminrésur3gent_’in2é3narraimmis4cent__pro2g3nathchien3dent_sporu4lent_dissi3dent_corpu3lent_archi1é2pissubli2ment_indul3gent_confi3dent__syn2g3nathtrucu3lent_détri3ment_nutri3ment_succu3lent_turbu3lent__pa2r1a2che_pa2r1a2chèfichu3ment_entre3gent_conni3vent_mécon3tent_compé3tent__re4s5trict_dé3s2i3nen_re2s3plend1a2nesthésislalo2ment__dé3s2ensib_re4s5trein_phalan3s2tabsti3nent_",
|
||||
12 : "polyva3lent_équiva4lent_monova3lent_amalga2ment_omnipo3tent__ma2l1a2dreséquipo3tent__dé3s2a3tellproémi3nent_contin3gent_munifi3cent__ma2g3nicideo1s2trictionsurémi3nent_préémi3nent__bai2se3main",
|
||||
13 : "acquies4cent_intelli3gent_tempéra3ment_transpa3rent__ma2g3nificatantifer3ment_",
|
||||
14 : "privatdo3cent_diaphrag2ment_privatdo3zent_ventripo3tent__contre3maître",
|
||||
15 : "grandilo3quent_",
|
||||
16 : "_chè2vre3feuille"
|
||||
}
|
||||
};
|
File diff suppressed because one or more lines are too long
@ -1,12 +0,0 @@
|
||||
// For questions about the Gujarati hyphenation patterns
|
||||
// ask Santhosh Thottingal (santhosh dot thottingal at gmail dot com)
|
||||
Hyphenator.languages['gu'] = {
|
||||
leftmin : 2,
|
||||
rightmin : 2,
|
||||
shortestPattern : 1,
|
||||
longestPattern : 1,
|
||||
specialChars : unescape("આઅઇઈઉઊઋએઐઔકગખઘઙચછજઝઞટઠડઢણતથદધનપફબભમયરલવશષસહળિીાુૂૃેાોૈૌ્ઃં%u200D"),
|
||||
patterns : {
|
||||
2 : "અ1આ1ઇ1ઈ1ઉ1ઊ1ઋ1એ1ઐ1ઔ1િ1ા1ી1ુ1ૂ1ૃ1ે1ો1ૌ1્2ઃ1ં11ક1ગ1ખ1ઘ1ઙ1ચ1છ1જ1ઝ1ઞ1ટ1ઠ1ડ1ઢ1ણ1ત1થ1દ1ધ1ન1પ1ફ1બ1ભ1મ1ય1ર1લ1વ1શ1ષ1સ1હ1ળ"
|
||||
}
|
||||
};
|
@ -1,12 +0,0 @@
|
||||
// For questions about the Hindi hyphenation patterns
|
||||
// ask Santhosh Thottingal (santhosh dot thottingal at gmail dot com)
|
||||
Hyphenator.languages['hi'] = {
|
||||
leftmin : 2,
|
||||
rightmin : 2,
|
||||
shortestPattern : 1,
|
||||
longestPattern : 1,
|
||||
specialChars : unescape("आअइईउऊऋऎएऐऒऔकगखघङचछजझञटठडढणतथदधनपफबभमयरलवशषसहळऴऱिीाुूृॆेॊाोैौ्ःं%u200D"),
|
||||
patterns : {
|
||||
2 : "अ1आ1इ1ई1उ1ऊ1ऋ1ऎ1ए1ऐ1ऒ1औ1ि1ा1ी1ु1ू1ृ1ॆ1े1ॊ1ो1ौ1्2ः1ं11क1ग1ख1घ1ङ1च1छ1ज1झ1ञ1ट1ठ1ड1ढ1ण1त1थ1द1ध1न1प1फ1ब1भ1म1य1र1ल1व1श1ष1स1ह1ळ1ऴ1ऱ"
|
||||
}
|
||||
};
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -1,20 +0,0 @@
|
||||
Hyphenator.languages['it'] = {
|
||||
leftmin : 2,
|
||||
rightmin : 2,
|
||||
shortestPattern : 1,
|
||||
longestPattern : 8,
|
||||
specialChars : "àéèìòù’'",
|
||||
// The italian hyphenation patterns are retrieved from
|
||||
// http://www.ctan.org/tex-archive/language/hyphenation/ithyph.tex
|
||||
patterns : {
|
||||
2 : "1b1c1d1f1g1h1j1k1l1m1n1p1q1r1t1v1w1x1z",
|
||||
3 : "2’2e2w2bb2bc2bd2bf2bm2bn2bp2bs2bt2bvb2lb2r2b_2b’2cb2cc2cd2cf2ck2cm2cn2cq2cs2ct2czc2hc2lc2r2c_2c’_c22db2dd2dg2dl2dm2dn2dpd2r2ds2dt2dv2dw2d_2d’_d22fb2fg2ff2fnf2lf2r2fs2ft2f_2f’2gb2gd2gf2ggg2hg2l2gmg2n2gpg2r2gs2gt2gv2gw2gz2g_2g’2hb2hd2hhh2l2hm2hn2hr2hv2h_2h’2j_2j’2kg2kfk2h2kkk2l2kmk2r2ks2kt2k_2k’2lb2lc2ld2lgl2h2lk2ll2lm2ln2lp2lq2lr2ls2lt2lv2lw2lz2l_2mb2mc2mf2ml2mm2mn2mp2mq2mr2ms2mt2mv2mw2m_2m’2nb2nc2nd2nf2ng2nk2nl2nm2nn2np2nq2nr2ns2nt2nv2nz2n_2n’2pdp2hp2l2pn2ppp2r2ps2pt2pz2p_2p’2qq2q_2q’2rb2rc2rd2rfr2h2rg2rk2rl2rm2rn2rp2rq2rr2rs2rt2rv2rx2rw2rz2r_2r’1s22sz4s_2tb2tc2td2tf2tgt2ht2l2tm2tn2tpt2rt2s2tt2tv2twt2z2t_2vcv2lv2r2vv2v_w2h2w_2w’2xb2xc2xf2xh2xm2xp2xt2xw2x_2x’y1i2zb2zd2zl2zn2zp2zt2zs2zv2zz2z_",
|
||||
4 : "_p2sa1iaa1iea1ioa1iua1uoa1ya2at_e1iuo1iao1ieo1ioo1iu2chh2chbch2r2chn2l’_2l’’2shm2sh_2sh’2s3s2stb2stc2std2stf2stg2stm2stn2stp2sts2stt2stv4s’_4s’’2tzktz2s2t’_2t’’2v’_2v’’wa2r2w1yy1ou2z’_2z’’_z2",
|
||||
5 : "_bio1_pre12gh2t2l3f2n2g3n3p2nes4s3mt2t3s",
|
||||
6 : "_a3p2n_anti1_free3_opto1_para1hi3p2n2nheit3p2sicr2t2s32s3p2n3t2sch",
|
||||
7 : "_ca4p3s_e2x1eu_narco1_su2b3r_wa2g3n_wel2t1n2s3fer",
|
||||
8 : "_contro1_fran2k3_li3p2sa_orto3p2_poli3p2_sha2re3_su2b3lu",
|
||||
9 : "_anti3m2n_circu2m1_re1i2scr_tran2s3c_tran2s3d_tran2s3l_tran2s3n_tran2s3p_tran2s3r_tran2s3t",
|
||||
10 : "_di2s3cine"
|
||||
}
|
||||
};
|
@ -1,13 +0,0 @@
|
||||
// For questions about the Kannada hyphenation patterns
|
||||
// ask Santhosh Thottingal (santhosh dot thottingal at gmail dot com)
|
||||
Hyphenator.languages['kn'] = {
|
||||
leftmin : 2,
|
||||
rightmin : 2,
|
||||
shortestPattern : 1,
|
||||
longestPattern : 1,
|
||||
specialChars : "ಆಅಇಈಉಊಋಎಏಐಒಔಕಗಖಘಙಚಛಜಝಞಟಠಡಢಣತಥದಧನಪಫಬಭಮಯರಲವಶಷಸಹಳಱಿೀಾುೂೃೆೇೊಾೋೈೌ್ಃಂ",
|
||||
patterns : {
|
||||
2 : "ಅ1ಆ1ಇ1ಈ1ಉ1ಊ1ಋ1ಎ1ಏ1ಐ1ಒ1ಔ1ೀ1ು1ೂ1ೃ1ೆ1ೇ1ೊ1ೋ1ೌ1್2ಃ1ಂ11ಕ1ಗ1ಖ1ಘ1ಙ1ಚ1ಛ1ಜ1ಝ1ಞ1ಟ1ಠ1ಡ1ಢ1ಣ1ತ1ಥ1ದ1ಧ1ನ1ಪ1ಫ1ಬ1ಭ1ಮ1ಯ1ರ1ಲ1ವ1ಶ1ಷ1ಸ1ಹ1ಳ1ಱ",
|
||||
3 : "2ಃ12ಂ1"
|
||||
}
|
||||
};
|
@ -1,22 +0,0 @@
|
||||
// Latin hyphenation patterns converted by
|
||||
// Pablo Rodríguez (hyphenator at pragmata dot tk)
|
||||
// based on LaTeX Latin hyphenation patterns by Claudio Beccari
|
||||
// (http://tug.ctan.org/tex-archive/language/hyphenation/lahyph.tex)
|
||||
Hyphenator.languages['la'] = {
|
||||
leftmin : 2,
|
||||
rightmin : 2,
|
||||
shortestPattern : 1,
|
||||
longestPattern : 8,
|
||||
specialChars : "æœ",
|
||||
patterns : {
|
||||
2 : "æ1œ11b1c1d1f1g1h1j1k1l1m1n1p1r1t1v1x1z",
|
||||
3 : "2bb2bdb2l2bm2bnb2r2bt2bs2b_2ccc2l2cm2cn2cqc2r2cs2ct2cz2c_2dd2dg2dmd2r2ds2dv2d_2fff2l2fnf2r2ft2f_2gg2gd2gfg2l2gmg2ng2r2gs2gv2g_2hp2ht2h_2kk2lb2lc2ld2lf2lg2lk2ll2lm2ln2lp2lq2lr2ls2lt2lv2l_2mm2mb2mp2ml2mn2mq2mr2mv2m_2nb2nc2nd2nf2ng2nl2nm2nn2np2nq2nr2ns2nt2nv2nx2n_p2hp2l2pn2ppp2r2ps2pt2pz2p_2rb2rc2rd2rf2rgr2h2rl2rm2rn2rp2rq2rr2rs2rt2rv2rz2r_1s22s_2tb2tc2td2tf2tgt2ht2lt2r2tm2tn2tp2tq2tt2tv2t_v2lv2r2vv2xt2xx2x_2z_",
|
||||
4 : "a1iaa1iea1ioa1iuae1aae1oae1ue1iuio1io1iao1ieo1ioo1iuuo3uc2h2k2h22php2pht1qu22s3s2stb2stc2std2stf2stg2stm2stn2stp2stq2sts2stt2stv2st_a1uaa1uea1uia1uoa1uue1uae1uee1uie1uoe1uui1uai1uei1uii1uoi1uuo1uao1ueo1uio1uoo1uuu1uau1ueu1uiu1uou1uu",
|
||||
5 : "_e2x1_o2b3l3f2tn2s3mn2s3f2s3ph2st3l",
|
||||
6 : "_a2b3l_anti13p2sic3p2neua2l1uaa2l1uea2l1uia2l1uoa2l1uue2l1uae2l1uee2l1uie2l1uoe2l1uui2l1uai2l1uei2l1uii2l1uoi2l1uuo2l1uao2l1ueo2l1uio2l1uoo2l1uuu2l1uau2l1ueu2l1uiu2l1uou2l1uua2m1uaa2m1uea2m1uia2m1uoa2m1uue2m1uae2m1uee2m1uie2m1uoe2m1uui2m1uai2m1uei2m1uii2m1uoi2m1uuo2m1uao2m1ueo2m1uio2m1uoo2m1uuu2m1uau2m1ueu2m1uiu2m1uou2m1uua2n1uaa2n1uea2n1uia2n1uoa2n1uue2n1uae2n1uee2n1uie2n1uoe2n1uui2n1uai2n1uei2n1uii2n1uoi2n1uuo2n1uao2n1ueo2n1uio2n1uoo2n1uuu2n1uau2n1ueu2n1uiu2n1uou2n1uua2r1uaa2r1uea2r1uia2r1uoa2r1uue2r1uae2r1uee2r1uie2r1uoe2r1uui2r1uai2r1uei2r1uii2r1uoi2r1uuo2r1uao2r1ueo2r1uio2r1uoo2r1uuu2r1uau2r1ueu2r1uiu2r1uou2r1uu",
|
||||
7 : "_para1i_para1u_su2b3r2s3que_2s3dem_",
|
||||
8 : "_su2b3lu",
|
||||
9 : "_anti3m2n_circu2m1_co2n1iun",
|
||||
10 : "_di2s3cine"
|
||||
}
|
||||
};
|
@ -1,17 +0,0 @@
|
||||
// For questions about the Lithuanian hyphenation patterns
|
||||
// ask Rogutės Sparnuotos (rogutes at googlemail dot com)
|
||||
Hyphenator.languages['lt'] = {
|
||||
leftmin : 2,
|
||||
rightmin : 2,
|
||||
shortestPattern : 2,
|
||||
longestPattern : 6,
|
||||
specialChars : "ščžįėūąęų",
|
||||
patterns : {
|
||||
3 : "a1ba1ca1da1ea1fa1ha1ja1ka1la1ma1na2oa1pa1ra1sa1ta4ua1va1wa1ya1za1ąa1ęa1įa1ša1ųa1žb1jcu4d4zd4že1ce1ee1fe1ge1he1je1ke1le1me2oe3pe1re1se1te1ve1we1ye1ze1ąe1če1ęe1ėe1še1ųe1ūe1ž3gui2ai1ci1di2ei1fi1ji1ki1li1mi2oi1pi1ri2ui1vi1wi1yi1zi2ąi1či1ęi1įi1ši2ųi2ūi3ž1ju1jū3kek4i3ko3ku5ką3lą3lųm2am2one15noo1ao1bo1co1do1eo1fo1go1ho1io1jo1ko1lo1mo1no1po1ro1so1vo1wo1yo1zo1ąo1čo1ęo1ėo1įo1šo1ųo1ūo1žpa3p2ep2i3pop2yp2ėr2ar2ir2or2u3rą5rųs2esu1są35sėt2at2it2o3tęu1bu1cu1du1fu1gu1hu1ju1ku1lu1mu1nu2ou3pu1ru1su1uu1vu1wu1yu1zu1ąu1ču1ęu1įu1šu1ųu1žv2e3vą3vėy1ay1by1cy1dy1ey1fy1gy1hy1iy1jy1ky1ly1my1ny1oy1py3ry1sy1ty1uy1vy1wy1yy1zy1ąy1čy1ęy1ėy1įy1šy1ųy1ūy1žą1aą1bą1cą1dą1eą1fą1gą1hą1ią1ją1ką1lą1mą1ną1oą1pą1rą1są1tą1uą1vą1wą1yą1zą1ąą1čą1ęą1ėą1įą1šą1ųą1ūą1žę1aę1bę1cę1dę1eę1fę1gę1hę1ię1ję1kę1lę1mę1nę1oę1pę1rę1sę1tę1uę1vę1wę1yę1zę1ąę1čę1ęę1ėę1įę1šę1ųę1ūę1žė1aė1bė1cė1dė1eė1fė1gė1hė1iė1jė1kė1mė1nė1oė1pė1rė1uė1vė1wė1yė1zė1ąė1čė1ęė1ėė1įė1šė1ųė1ūė1žį1aį1bį1cį2eį1fį4gį1hį1iį1jį2lį2mį1nį1oį4pį2rį1sį1uį2vį1wį1yį1zį1ąį1čį1ęį1įį1šį1ųį1ūį1žų1aų1bų1cų1dų1eų1fų1gų1hų1ių1jų1kų1lų1mų1nų1oų1pų1rų1sų1tų1uų1vų1wų1yų1zų1ąų1čų1ęų1ėų1įų1šų1ųų1ūų1žū1aū1bū1cū1dū1eū1fū1gū1hū1iū1jū1kū1lū1mū1nū1oū1pū1rū1sū1tū1uū1vū1wū1yū1zū1ąū1čū1ęū1ėū1įū1šū1ųū1ūū1žž1jž2u",
|
||||
4 : "_ap1_at1_nu1_sk4_st4_uk3_už1_įs4a1a2a2chae2l2a1ga4grak2la3lia3lya3lėa3lūap3la3pra4rgasi1a4ska3slato1atp42a1ča1ė2a3šnaš2va1ū2až2lba3c2b1b2b1c4b3dbep42b1f2b1g2b1h2b1k2b1lb2lab2lu2b1m2b1n2b1p2b2rb3ri2b1s2b3tbu4k2b1v2b1w2b1z2b1č2b1š2b1žcar42c1b2c1c2c1d2c1f2c1g3chi2c1k2c1l2c1m2c1n2c1p2c1r2c1s2c1tcuk52c1v2c1w2c1z2c1č2c1š2c1ž2d1b2d1c2d1dde4k2d1f2d1g2d1hdi4pdi4s2d1j2d3k4d3l2d1m2d1n2d1p2d1rdro1d2rė4drų2d1s2d1tdu2adu4k2d1vd3va2d1w2d1č2d1še1a2eat1e1b2ebe1e3bleb3re2che3d2e1ie1eine3kr2e1neno1ens4enu1e3orep4rero1erė2es4ke1u4euž3e3vie1į4eįp3e3šneš2vešė32f1b2f1c2f1d2f1f2f1g2f1h2f1k2f1l2f1m2f1n2f1p2f1rfri12f1s2f1t2f1v2f1w2f1z2f1č2f1š2f1žgaš32g1b2g1c2g1d2g1f2g1g2g1h2g1k2g1lg2leg2lo2g1m2g1n3g2rg3rig4rą5grį4grų2g1s4g1t2g1v2g1w2g1z2g1č2g1š2g1ž2h1b2h1c2h1d2h1f2h1g2h1hhi4b2h1k2h2l2h1mh2me2h1n2h1p2h1r2h1s2h1t2h1v2h1w2h1z2h1č2h1š2h1žia3kiap4i3ari3b2ice1id2rie3gi3ei2i1g2i1hi1i2ik3rilo11imd4i1n5inviok2i3ori3ri4i1si5sai5siis4ki3sli1ė2i3šniš5ti4švišė21išš3ja_2j1b2j1c2j1d2j1f2j1g2j1h2j1j2j1k2j1l2j1m2j1njot32j1p2j1r2j1s2j1t2j1v2j1w2j1z2j1č2j1š2j1ž3ka_kak33kar3kas5kav2k1b2k1c2k1d2k1f2k1g2k1h5ki_5kia3kib3kil5kit2k1k2k1lk2ly4klą4klų2k1m2k1n2k1p2k2rk3rok4ryk5rą2k1sk4skks3l4k1tk3ta4kubku4k2k1vk2vak3vok2vė2k1w3kyt2k1z2k1č2k1škšė32k1ž3la_2l1blb3r2l1c2l1d3le_le3c4leč2l1f2l1g2l1h3li_li4a3lių2l1j2l1klko12l1l2l1m2l1n3lo_log44lop4l1p2l1r4l1sl4sk4l3t3lu_2lup4l1v2l1w3lyč2l1z2l1č3lęs3lėm3lės2lėč3lį44l1š2l1ž3mas2m1bmb3r2m1c2m1dme3c2m1f2m1g2m1h2m1k2m1l2m1m2m1n2mod2m1pm4plm3pr2m1r4m1s2m1tm3ta3muo2m1v2m1w2m1z2m1č2m1š2m1ž2n1b2n1c2n1dneg42n1f4n1gng3lng4r2n1hni4s2n1j2n1knk3r2n1l2n1m2n1n2n1p2n1r4n1sns3l4n1tn3tanua4nu3gnu1inuk22n1v2n1w2n1z4n1č2n1š2n1žo3dro2eto3krono1o1o2o2ręo3slo3t2oto1oš2vpad23panpap4p2arpas22p1b4p1cper12p1f2p1g2p1h3p2lp3lep3lip3lop4lup4ly2p1m4p1npo4g2p1p2p2rp3rip3ry4prąp3rėp3rū4p1spsi13p4u2p1v2p1w2p1z2p1č4p3ž3ra_ra3b3rac3ral3ram3rav2r1brbo12r1c4r1d2reb3rel5res3ret2r1f2r1g2r1h3ri_ri3d3rij2ril3riori3p3rią3rių2riū4r1k4r1l2r1m4r1n3roj3rok3ron3rop5ros2r1p2r1r4r1srs4p4r1t2ruk3rul4run2rup3rus4r1v2r1w2r1z4rąs4r1č4r1š2r1ž2s3b2s1c2s3d2s1f2s1g2s1hsi3d4sk_4ske5ski5sky4ską5skę2s2l3sles3lis3lo2s1m4smę4s3n3s2ps3pe4spn4spu4s1rs3ri2s1s2s1tst2v4stę4stųs2tūsuž42s1vs2ve2s1w2s1z4s3č2s1š2s1ž2tabta5s2tat2tač2t1b2t1c2t1d2teb3temte4o2t1f4t3g2t1h3tin2tip4t3j4t3kt4kl4t3l4t3m2t1n2tow2t3pt4pjtp4r2t1r4tro4trą4trųtsi14t1ttua43tur2t1vt2vo2t1w3tyd2t1z2t1čt2ėm2t3žu1a2u3aiu2bju1e2ug4ru5inui2ru3klu3kru3kvu5kųu3orup4lurk2u3slu3t2uto12utru1ė2uš2vu1ū2u3žluž3v3va_vap4va3t2v1b2v1c2v1d2vep3ves2v1f2v1g2v1h2v1j2v1k2v1l2v1m2v1n2v1p2v1r2v4s2v1t2v1v2v1w2v1z2v1č4vėp2v1š2v1ž2w1b2w1c2w1d2w1f2w1g2w1h2w1k2w1l2w1m2w1n2w1p2w1r2w1s2w1t2w1v2w1w2w1z2w1č2w1š2w1žy4sk2z1b2z1c2z1d2z1f2z1g2z1h2z1k2z1l2z1m2z1n2z1p2z1r2z1s2z1t2z1v2z1w2z1z2z1č2z1š2z1ž2č1b2č1c2č1d2č1f2č1g2č1hčin12č1k2č1l2č1m2č1n2č1p2č1r2č1s2č1t2č1v2č1w2č1z2č1č2č1š2č1ž2ė3lė3me2ė1sės3l2ė3tė4trį1d2į2k4įsi1įs3lįs2mįs2rįst2į2t2į1ė22š1cš1eiše2v2š1f2š1h2š1lš2lu3šly2š1m2š1nš2nešno1šo2r2š1ršsi1šu4š3š1vš2vi2š1w2š1z4š3čšė2j2š1š2š1žūs3lūs3t2ž1cžen12ž3g2ž1hžio32ž1lž2lu4ž1m2ž1n2ž3p2ž1r2ž1sžsi1ž4skžs3t3ž2vž4viž3vo2ž1w2ž1z2ž1čžįs32ž1š2ž1ž",
|
||||
5 : "_api1_ap3r_as3p_ata1_at3r_aš3v_ek3r_i2š3_iši2_sam1_u2š3a4draag3raa5groa5infai4skai4trak2via2kėta5lo1an4skan4trap3eiap3s2ari4sasi5sas3klas2mias2mua5s4na4stua4truat1ė2atū2žau4klau4skau4slaš4trba4slbe3s2besi1be3t2b2liz3b2loda3b4dema1de4prde4spdis5kdo3ridro2bdu5kadu4sldvi3adžio1e2a3leapi1ed3rie4droed3rėeg3raei4skei4slei2š14entae3o2šep4liepra1epri1eri4sesi5seska1e5skoe3s2veįs3keįt3rfi4s5ge4o13g2nų2g1p2grai2g3ran5g3reg4reig3ro_gyva1ia5g4i3aišid3rėi2dėmie3kli3entie4ski3g2lig3rui3k2nin4klio4pli2p1jip3ruis4lii5s4ni4steisto12i3t2jo4tv5ka3dka4klka4pr3katika3z2ke4b33k2lak4lan4kle_k2lel4klo_4klu_k4raukris2k4rov4k3ru5krunk4s3pku4prkš2lyle4gr5li4olpna1l4s3pmeis1mi4glmi4grmin4smi4trna3s2na3t2ncen1n3drėnei2mne4o3neor2nerė3nesi1ne3slne3t2nk3lan3kryns4kun4s3pnst3rnt4pln3trunu3b2nu4o3nuos2nu5s4olen1om4pro2rieori4sor4tros3leos4loo3s2vpai2lpai2mpai2rpa5srp2at24p3d2pe2reper3spe2rėp3ieš2p3k24pliop4liup4lojpoli1p3raip3rašpris2p5ro_p3romp5s4kp3s2v4p3t2pu4skpu4slpu4tr4p3š2ra1imra3krras4lra3smr3b4r4rein4reitre4p5res3lri3krrisi1ri3strk3lyr3k4r3rod2ro4grr3p4rr3t2rr3t2v3ruosru4sk4rutor3š2lr3š2msala1san5tse4krsi3k4si3p4si3s23s2k2s5ka_5sk4r5skubsk3vask3vi4s3lu4s3lū4sme_so4drs2tals2ten4stins2tods2toj4s3tėsu3blsu3d2su3g2suk2lsu3s2susi13s2vyta3krte3t2ti4gr3toje2tolįto3s2tp3lū4triot2riš4t3s44tvėjt3ėmut3ėmęt3ėmė2t3š2u1i2muk2leuo4slup3rour3klu5ro1ur3s2us3laus3leusva1us3veu3š2lu3š2nuži2muž1ė2ygia1y4k3lčeko1ė4k3l2š1b22š3d22š1g4šiuk12š1k2š2lij2š1p42š3s44š3t2š4vydū4k3lū4s3kžant42ž3b22ž3d22ž3f4ži3mu2ž3k2ž4s5l4ž3t2žu3s2",
|
||||
6 : "_arbi1_arti1_dina1_nusi1_pie2č_sida1_su5kr_te3s2_šven1a5grioan3k2lap1a4kap3i2mar1eitat3augat3i2mau4t3rbalta1ei4k3l2eis3tema5s4en3k2lenk4laere3a44g4rioi3antęi3antėie4d3rie4p5rio4g3rira3s2jauna1kavar1keren1kri5stlg3s2t2m3aidmas3kine3s2tneįs3tnt2ruoparsi1p4s3tyrai4tįri2ma_2r3imtrivin12r3orgrs4ko_rti5k4rti3s22s1amžsarka1senat4si3auksi3a2vsi3a2šsi5š2v5s4ken5s4kle3s2tovst2raitaura1ti4k3ltskri1ug5riou2s1alvi4s5kvi4t3r2vydau2š5istžants5",
|
||||
7 : "_kirti1ate5isteist2rapa4r1ė2pe2r3imprau2sipusiau1rau4ka_viesia1šsikap1",
|
||||
8 : "lap4s3to"
|
||||
}
|
||||
};
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user