rebase on upstream trunk
171
Changelog.yaml
@ -4,6 +4,176 @@
|
||||
# for important features/bug fixes.
|
||||
# Also, each release can have new and improved recipes.
|
||||
|
||||
- version: 0.7.29
|
||||
date: 2010-11-19
|
||||
|
||||
new features:
|
||||
- title: "OSX binary build is now based on Qt 4.7. Also, the build is now Intel only and requires at least OS X 10.5.2. If you are on a PowerPC machine or an older OS X version, do not upgrade"
|
||||
|
||||
- title: "Content server: Allow direct navigation to a set of books in the book list."
|
||||
tickets: [7453]
|
||||
|
||||
- title: "OS X: When deleting books, put the files into the recycle bin instead of deleting them permanently"
|
||||
|
||||
- title: "Add button to easy configure Hotmail as email relay. Also improve usability of easy config buttons"
|
||||
|
||||
- title: "Kobo driver: Support Currently_Reading category"
|
||||
|
||||
- title: "Catalog generation: Thumbnail caching, wishlist, improved description layout."
|
||||
tickets: [7376]
|
||||
|
||||
- title: "Support for the Cybook Orizon"
|
||||
|
||||
bug fixes:
|
||||
- title: "Fix restore to defaults in preferences incorrectly setting PDF unwrap factor to 0.0"
|
||||
|
||||
- title: "PDF Input: Fix unwrapping of accented characters"
|
||||
|
||||
- title: "Do not display dialogs asking for confirmation or showing conversion errors when calibre is minimized to system tray"
|
||||
tickets: [7549]
|
||||
|
||||
- title: "calibre server: Fix regression that broke digest authentication when the calibre interface language was set to non English"
|
||||
|
||||
- title: "EPUB Output: Do not raise an error for invalid embedded fonts in the input document."
|
||||
tickets: [7567]
|
||||
|
||||
- title: "RTF Input: Improved conversion of tables, with support for border styles on table cells"
|
||||
|
||||
- title: "E-book viewer: Fix regression that broke hyphenation. Also add more language patterns for hyphenation"
|
||||
|
||||
- title: "SONY driver: Fix cover thumbnails being uploaded to wrong directory on windows"
|
||||
|
||||
- title: "Fix UnicodeDecodeError when displaying a failed metadata fetch message"
|
||||
tickets: [7560]
|
||||
|
||||
- title: "Bulk metadata edit: Speed up remove all tags operation"
|
||||
|
||||
- title: "MOBI Output: Specify image sizes in pixels instead of em to accomodate Amazon's @#$%#@! MOBI renderer"
|
||||
|
||||
- title: "Fix bug preventing customizing of builtin recipes if they are not ascii encoded"
|
||||
|
||||
- title: "SONY XML cache: Handle case where XML db contains reference to a file that does not exist gracefully"
|
||||
|
||||
improved recipes:
|
||||
- Al Jazeera
|
||||
- The Moscow Times
|
||||
- GLobe and Mail
|
||||
- Washington Post
|
||||
|
||||
new recipes:
|
||||
- title: "Hannoversche Allgemeine Zeitung"
|
||||
author: "Artemis"
|
||||
|
||||
- title: "globes.co.il"
|
||||
author: "marbs"
|
||||
|
||||
- title: "THN and RDS"
|
||||
author: "Nexus"
|
||||
|
||||
- title: "pclab.pl"
|
||||
author: "ravcio"
|
||||
|
||||
- title: "Now Toronto"
|
||||
author: "Starson17"
|
||||
|
||||
- title: "Press releases of the German government and EU Commission"
|
||||
author: "malfi"
|
||||
|
||||
|
||||
- version: 0.7.28
|
||||
date: 2010-11-12
|
||||
|
||||
new features:
|
||||
- title: "Update the version of the grahical toolkit (Qt 4.7.1) used in the calibre binary builds on windows and linux. This should result in a significant speed up for the calibre ebook viewer"
|
||||
|
||||
- title: "Driver for Nook Color, Eken M001"
|
||||
|
||||
- title: "Add a tweak to turn off double clicking to open viewer"
|
||||
|
||||
- title: "Catalog generation: Add indication when a book has no formats"
|
||||
tickets: [7376]
|
||||
|
||||
- title: "Advanced search dialog: Add a tab to allow searching particular metadata fields easily"
|
||||
|
||||
- title: "Conversion pipeline: When using the Level x Table of Contents expressions, if a tag is empty but has a non-empty title attribute, use that instead of ignoring the tag"
|
||||
|
||||
bug fixes:
|
||||
- title: "Comic metadata reader: Sort filenames aplhabetically when choosing an image for the cover"
|
||||
tickets: [7488]
|
||||
|
||||
- title: "Bulk convert dialog: Hide useless restore defaults button."
|
||||
tickets: [7471]
|
||||
|
||||
- title: "Conversion pipeline: Handle input documents that encode null bytes as HTML entities correctly"
|
||||
tickets: [7355]
|
||||
|
||||
- title: "Fix some SONY readers not being detected on windows"
|
||||
tickets: [7413]
|
||||
|
||||
- title: "MOBI Input: Fix images missing when converting MOBI news downloads created with Mobipocket reader"
|
||||
tickets: [7455]
|
||||
|
||||
- title: "ODT Input: Handle hyperlinks to headings that have truncated destination specifiers correctly"
|
||||
tickets: [7506]
|
||||
|
||||
- title: "Sony driver: Ignore invalid strings when updating XML database"
|
||||
|
||||
- title: "Content Server: Add day to displayed date in /mobile book listing"
|
||||
|
||||
- title: "MOBI Input: Do not generate filenames with only extensions if the MOBI file has no internal name"
|
||||
tickets: [7481]
|
||||
|
||||
- title: "MOBI Input: Handle files that has the record sizes set incorrectly to a long integer"
|
||||
tickets: [7472]
|
||||
|
||||
- title: "Fix not enough vertical space for text in the preferences dialog category listing"
|
||||
|
||||
- title: "Remove 'sort' from Search and replace destination fields and add it to source fields. S&R is no longer marked experimental"
|
||||
|
||||
- title: "Edit metadata dialog: Save dialog geometry on reject as well as on accept"
|
||||
|
||||
- title: "E-book viewer: Fix clicking entries in TOC that point to the currently loaded flow not scrolling view to the top of the document"
|
||||
|
||||
- title: "Fix bug in regex used to extract charset from <meta> tags"
|
||||
|
||||
- title: "MOBI Output: Add support for the <q> tag"
|
||||
|
||||
improved recipes:
|
||||
- Zeit Online
|
||||
- Gamespot Review
|
||||
- Ploitika
|
||||
- Pagina12
|
||||
- Irish Times
|
||||
- elektrolese
|
||||
|
||||
new recipes:
|
||||
- title: "Handelsblatt and European Voice"
|
||||
author: "malfi"
|
||||
|
||||
- title: "Polityka and Newsweek"
|
||||
author: "Mateusz Kielar"
|
||||
|
||||
- title: "MarcTV"
|
||||
author: "Marc Toensings"
|
||||
|
||||
- title: "Rolling Stone"
|
||||
author: "Darko Miletic"
|
||||
|
||||
- title: "Vedomosti"
|
||||
author: "Nikolai Kotchetkov"
|
||||
|
||||
- title: "Hola.com"
|
||||
author: "bmsleight"
|
||||
|
||||
- title: "Dnevnik, Siol.net, MMC-RTV and Avto-magazon"
|
||||
author: "BlonG"
|
||||
|
||||
- title: "SC Print Magazine"
|
||||
author: "Tony Maro"
|
||||
|
||||
- title: "Diario Sport"
|
||||
author: "Jefferson Frantz"
|
||||
|
||||
- version: 0.7.27
|
||||
date: 2010-11-05
|
||||
|
||||
@ -44,6 +214,7 @@
|
||||
tickets: [7356]
|
||||
|
||||
- title: "News download: Workaround lack of thread safety in python mechanize, causing corrupted network packets (degrading network performance) on Ubuntu Maverick 64bit kernels"
|
||||
tickets: [7321]
|
||||
|
||||
- title: "Convert comments to HTML for book details panel in separate thread to make scrolling through the book list faster when large comments are present"
|
||||
|
||||
|
@ -12,8 +12,8 @@ p.title {
|
||||
p.author {
|
||||
margin-top:0em;
|
||||
margin-bottom:0em;
|
||||
text-align: left;
|
||||
text-indent: 1em;
|
||||
text-align: center;
|
||||
text-indent: 0em;
|
||||
font-size:large;
|
||||
}
|
||||
|
||||
@ -27,17 +27,28 @@ p.author_index {
|
||||
}
|
||||
|
||||
p.tags {
|
||||
margin-top:0em;
|
||||
margin-top:0.5em;
|
||||
margin-bottom:0em;
|
||||
text-align: left;
|
||||
text-indent: 1em;
|
||||
font-size:small;
|
||||
text-indent: 0.0in;
|
||||
}
|
||||
|
||||
p.description {
|
||||
text-align:left;
|
||||
font-style:normal;
|
||||
margin-top: 0em;
|
||||
p.formats {
|
||||
font-size:90%;
|
||||
margin-top:0em;
|
||||
margin-bottom:0.5em;
|
||||
text-align: left;
|
||||
text-indent: 0.0in;
|
||||
}
|
||||
|
||||
div.description > p:first-child {
|
||||
margin: 0 0 0 0;
|
||||
text-indent: 0em;
|
||||
}
|
||||
|
||||
div.description {
|
||||
margin: 0 0 0 0;
|
||||
text-indent: 1em;
|
||||
}
|
||||
|
||||
p.date_index {
|
||||
@ -81,6 +92,14 @@ p.unread_book {
|
||||
text-indent:-2em;
|
||||
}
|
||||
|
||||
p.wishlist_item {
|
||||
text-align:left;
|
||||
margin-top:0px;
|
||||
margin-bottom:0px;
|
||||
margin-left:2em;
|
||||
text-indent:-2em;
|
||||
}
|
||||
|
||||
p.date_read {
|
||||
text-align:left;
|
||||
margin-top:0px;
|
||||
@ -104,3 +123,14 @@ hr.annotations_divider {
|
||||
margin-top:0em;
|
||||
margin-bottom:0em;
|
||||
}
|
||||
|
||||
td.publisher, td.date {
|
||||
font-weight:bold;
|
||||
text-align:center;
|
||||
}
|
||||
td.rating {
|
||||
text-align: center;
|
||||
}
|
||||
td.thumbnail img {
|
||||
-webkit-box-shadow: 4px 4px 12px #999;
|
||||
}
|
@ -355,6 +355,25 @@ h2.library_name {
|
||||
color: red;
|
||||
}
|
||||
|
||||
#booklist > #pagelist { display: none; }
|
||||
|
||||
#goto_page_dialog ul {
|
||||
list-style-type: none;
|
||||
font-size: medium;
|
||||
}
|
||||
|
||||
#goto_page_dialog li {
|
||||
margin-bottom: 1.5ex;
|
||||
}
|
||||
|
||||
#goto_page_dialog a {
|
||||
text-decoration: none;
|
||||
color: blue;
|
||||
}
|
||||
|
||||
#goto_page_dialog a:hover {
|
||||
color: red;
|
||||
}
|
||||
|
||||
#booklist .left .ui-button-text {
|
||||
font-size: medium;
|
||||
|
@ -4,7 +4,7 @@
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
||||
<title>..:: calibre library ::.. {title}</title>
|
||||
<title>..:: calibre {library} ::.. {title}</title>
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=100" />
|
||||
<link rel="icon" type="image/x-icon" href="http://calibre-ebook.com/favicon.ico" />
|
||||
|
||||
@ -41,7 +41,7 @@
|
||||
<div class="area">
|
||||
<div class="bubble">
|
||||
<p><a href="{prefix}/browse" title="Return to top level"
|
||||
>→ home ←</a></p>
|
||||
>→ {home} ←</a></p>
|
||||
</div>
|
||||
</div>
|
||||
<div id="nav-container">
|
||||
@ -80,7 +80,7 @@
|
||||
<form name="search_form" action="{prefix}/browse/search" method="get" accept-charset="UTF-8">
|
||||
<input value="{initial_search}" type="text" title="Search" name="query"
|
||||
class="search_input" />
|
||||
<input type="submit" value="Search" title="Search" alt="Search" />
|
||||
<input type="submit" value="{Search}" title="{Search}" alt="{Search}" />
|
||||
</form>
|
||||
</div>
|
||||
<div> </div>
|
||||
@ -96,5 +96,6 @@
|
||||
</div>
|
||||
</div>
|
||||
<div id="book_details_dialog"></div>
|
||||
<div id="goto_page_dialog"></div>
|
||||
</body>
|
||||
</html>
|
||||
|
@ -202,6 +202,23 @@ function previous_page() {
|
||||
else last_page();
|
||||
}
|
||||
|
||||
function gp_internal(id) {
|
||||
var gp = $('#goto_page_dialog');
|
||||
gp.dialog('close');
|
||||
var elem = $("#booklist #" + id);
|
||||
load_page(elem);
|
||||
}
|
||||
|
||||
function goto_page() {
|
||||
var gp = $('#goto_page_dialog');
|
||||
var pl = $('#booklist > #pagelist');
|
||||
gp.html(pl.html());
|
||||
gp.dialog('option', 'title', pl.attr('title'));
|
||||
gp.dialog('option', 'height', $(window).height() - 100);
|
||||
gp.dialog('open');
|
||||
|
||||
}
|
||||
|
||||
function load_page(elem) {
|
||||
if (elem.is(":visible")) return;
|
||||
var ld = elem.find('.load_data');
|
||||
@ -251,6 +268,12 @@ function booklist(hide_sort) {
|
||||
modal: true,
|
||||
show: 'slide'
|
||||
});
|
||||
$("#goto_page_dialog").dialog({
|
||||
autoOpen: false,
|
||||
modal: true,
|
||||
show: 'slide'
|
||||
});
|
||||
|
||||
first_page();
|
||||
}
|
||||
|
||||
|
@ -211,3 +211,9 @@ generate_cover_title_font = None
|
||||
# Absolute path to a TTF font file to use as the font for the footer in the
|
||||
# default cover
|
||||
generate_cover_foot_font = None
|
||||
|
||||
|
||||
# Behavior of doubleclick on the books list. Choices:
|
||||
# open_viewer, do_nothing, edit_cell. Default: open_viewer.
|
||||
# Example: doubleclick_on_library_view = 'do_nothing'
|
||||
doubleclick_on_library_view = 'open_viewer'
|
||||
|
BIN
resources/images/format-text-bold.png
Normal file
After Width: | Height: | Size: 5.0 KiB |
BIN
resources/images/format-text-italic.png
Normal file
After Width: | Height: | Size: 4.1 KiB |
BIN
resources/images/format-text-strikethrough.png
Normal file
After Width: | Height: | Size: 5.9 KiB |
BIN
resources/images/format-text-underline.png
Normal file
After Width: | Height: | Size: 4.4 KiB |
BIN
resources/images/hotmail.png
Normal file
After Width: | Height: | Size: 2.6 KiB |
BIN
resources/images/news/avto-magazin.png
Normal file
After Width: | Height: | Size: 1.4 KiB |
BIN
resources/images/news/dnevnik.png
Normal file
After Width: | Height: | Size: 861 B |
BIN
resources/images/news/moscow_times.png
Normal file
After Width: | Height: | Size: 1.0 KiB |
BIN
resources/images/news/rollingstone.png
Normal file
After Width: | Height: | Size: 1.3 KiB |
BIN
resources/images/news/siol.png
Normal file
After Width: | Height: | Size: 423 B |
50
resources/recipes/180.recipe
Normal file
@ -0,0 +1,50 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
|
||||
'''
|
||||
180.com.uy
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Noticias(BasicNewsRecipe):
|
||||
title = '180.com.uy'
|
||||
__author__ = 'Gustavo Azambuja'
|
||||
description = 'Noticias de Uruguay'
|
||||
language = 'es'
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
use_embedded_content = False
|
||||
recursion = 5
|
||||
encoding = 'utf-8'
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'tef-md tef-md-seccion-sociedad'})]
|
||||
remove_tags = [
|
||||
dict(name=['object','link'])
|
||||
]
|
||||
|
||||
remove_attributes = ['width','height', 'style', 'font', 'color']
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
|
||||
h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
|
||||
h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
|
||||
p {font-family:Arial,Helvetica,sans-serif;}
|
||||
'''
|
||||
feeds = [
|
||||
(u'Titulares', u'http://www.180.com.uy/feed.php')
|
||||
]
|
||||
|
||||
def get_cover_url(self):
|
||||
return 'http://www.180.com.uy/tplef/img/logo.gif'
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
@ -1,10 +1,8 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
|
||||
'''
|
||||
aljazeera.net
|
||||
english.aljazeera.net
|
||||
'''
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
@ -12,41 +10,59 @@ class AlJazeera(BasicNewsRecipe):
|
||||
title = 'Al Jazeera in English'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'News from Middle East'
|
||||
language = 'en'
|
||||
|
||||
language = 'en'
|
||||
publisher = 'Al Jazeera'
|
||||
category = 'news, politics, middle east'
|
||||
simultaneous_downloads = 1
|
||||
delay = 4
|
||||
oldest_article = 1
|
||||
delay = 1
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
encoding = 'iso-8859-1'
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
extra_css = """
|
||||
body{font-family: Arial,sans-serif}
|
||||
#ctl00_cphBody_dvSummary{font-weight: bold}
|
||||
#dvArticleDate{font-size: small; color: #999999}
|
||||
"""
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment', description
|
||||
, '--category', category
|
||||
, '--publisher', publisher
|
||||
, '--ignore-tables'
|
||||
]
|
||||
|
||||
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_table=True'
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'ctl00_divContent'})]
|
||||
keep_only_tags = [
|
||||
dict(attrs={'id':['DetailedTitle','ctl00_cphBody_dvSummary','dvArticleDate']})
|
||||
,dict(name='td',attrs={'class':'DetailedSummary'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link'])
|
||||
dict(name=['object','link','table','meta','base','iframe','embed'])
|
||||
,dict(name='td', attrs={'class':['MostActiveDescHeader','MostActiveDescBody']})
|
||||
]
|
||||
|
||||
feeds = [(u'AL JAZEERA ENGLISH (AJE)', u'http://english.aljazeera.net/Services/Rss/?PostingId=2007731105943979989' )]
|
||||
|
||||
def get_article_url(self, article):
|
||||
artlurl = article.get('link', None)
|
||||
return artlurl.replace('http://english.aljazeera.net//','http://english.aljazeera.net/')
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll(face=True):
|
||||
del item['face']
|
||||
td = soup.find('td',attrs={'class':'DetailedSummary'})
|
||||
if td:
|
||||
td.name = 'div'
|
||||
spn = soup.find('span',attrs={'id':'DetailedTitle'})
|
||||
if spn:
|
||||
spn.name='h1'
|
||||
for itm in soup.findAll('span', attrs={'id':['dvArticleDate','ctl00_cphBody_lblDate']}):
|
||||
itm.name = 'div'
|
||||
for alink in soup.findAll('a'):
|
||||
if alink.string is not None:
|
||||
tstr = alink.string
|
||||
alink.replaceWith(tstr)
|
||||
return soup
|
||||
|
||||
|
47
resources/recipes/avto-magazin.recipe
Normal file
@ -0,0 +1,47 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, BlonG'
|
||||
'''
|
||||
avto-magazin.si
|
||||
'''
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class Dnevnik(BasicNewsRecipe):
|
||||
title = u'Avto Magazin'
|
||||
__author__ = u'BlonG'
|
||||
description = u'Za avtomobilisti\xc4\x8dne frike, poznavalce in nedeljske \xc5\xa1oferje.'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 20
|
||||
labguage = 'sl'
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
language = 'sl'
|
||||
|
||||
conversion_options = {'linearize_tables' : True}
|
||||
|
||||
|
||||
cover_url = 'https://sites.google.com/site/javno2010/home/avto_magazin_cover.jpg'
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':'_iprom_inStream'}),
|
||||
# dict(name='div', attrs={'class':'entry-content'}),
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':'voteConfirmation'}),
|
||||
dict(name='div', attrs={'id':'InsideVote'}),
|
||||
dict(name='div', attrs={'class':'Zone234'}),
|
||||
dict(name='div', attrs={'class':'Comments'}),
|
||||
dict(name='div', attrs={'class':'sorodneNovice'}),
|
||||
dict(name='div', attrs={'id':'footer'}),
|
||||
]
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Novice', u'http://www.avto-magazin.si/rss/')
|
||||
]
|
58
resources/recipes/bitacora.recipe
Normal file
@ -0,0 +1,58 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
|
||||
'''
|
||||
bitacora.com.uy
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class General(BasicNewsRecipe):
|
||||
title = 'bitacora.com.uy'
|
||||
__author__ = 'Gustavo Azambuja'
|
||||
description = 'Noticias de Uruguay'
|
||||
language = 'es'
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
use_embedded_content = False
|
||||
recursion = 5
|
||||
encoding = 'iso-8859-1'
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
keep_only_tags = [dict(id=['txt'])]
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':'tablafoot'}),
|
||||
dict(name=['object','h4']),
|
||||
dict(name=['object','link'])
|
||||
]
|
||||
|
||||
remove_attributes = ['width','height', 'style', 'font', 'color']
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
|
||||
h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
|
||||
h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
|
||||
p {font-family:Arial,Helvetica,sans-serif;}
|
||||
'''
|
||||
feeds = [
|
||||
(u'Titulares', u'http://www.bitacora.com.uy/anxml.cgi?15')
|
||||
]
|
||||
|
||||
def get_cover_url(self):
|
||||
cover_url = None
|
||||
index = 'http://www.bitacora.com.uy'
|
||||
soup = self.index_to_soup(index)
|
||||
link_item = soup.find('img',attrs={'class':'imgtapa'})
|
||||
if link_item:
|
||||
cover_url = "http://www.bitacora.com.uy/"+link_item['src']
|
||||
return cover_url
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
@ -1,18 +1,22 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# -*- coding: utf-8 mode: python -*-
|
||||
|
||||
# Find the newest version of this recipe here:
|
||||
# https://github.com/consti/BrandEins-Recipe/raw/master/brandeins.recipe
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Constantin Hofstetter <consti at consti.de>'
|
||||
__version__ = '0.95'
|
||||
__copyright__ = '2010, Constantin Hofstetter <consti at consti.de>, Steffen Siebert <calibre at steffensiebert.de>'
|
||||
__version__ = '0.96'
|
||||
|
||||
''' http://brandeins.de - Wirtschaftsmagazin '''
|
||||
import re
|
||||
import string
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
|
||||
class BrandEins(BasicNewsRecipe):
|
||||
|
||||
title = u'Brand Eins'
|
||||
title = u'brand eins'
|
||||
__author__ = 'Constantin Hofstetter'
|
||||
description = u'Wirtschaftsmagazin'
|
||||
publisher ='brandeins.de'
|
||||
@ -22,11 +26,14 @@ class BrandEins(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
encoding = 'utf-8'
|
||||
language = 'de'
|
||||
publication_type = 'magazine'
|
||||
needs_subscription = True
|
||||
|
||||
# 2 is the last full magazine (default)
|
||||
# 1 is the newest (but not full)
|
||||
# 3 is one before 2 etc.
|
||||
which_ausgabe = 2
|
||||
# This value can be set via the username field.
|
||||
default_issue = 2
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'theContent'}), dict(name='div', attrs={'id':'sidebar'}), dict(name='div', attrs={'class':'intro'}), dict(name='p', attrs={'class':'bodytext'}), dict(name='div', attrs={'class':'single_image'})]
|
||||
|
||||
@ -61,17 +68,31 @@ class BrandEins(BasicNewsRecipe):
|
||||
|
||||
return soup
|
||||
|
||||
def get_cover(self, soup):
|
||||
cover_url = None
|
||||
cover_item = soup.find('div', attrs = {'class': 'cover_image'})
|
||||
if cover_item:
|
||||
cover_url = 'http://www.brandeins.de/' + cover_item.img['src']
|
||||
return cover_url
|
||||
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
|
||||
archive = "http://www.brandeins.de/archiv.html"
|
||||
|
||||
issue = self.default_issue
|
||||
if self.username:
|
||||
try:
|
||||
issue = int(self.username)
|
||||
except:
|
||||
pass
|
||||
|
||||
soup = self.index_to_soup(archive)
|
||||
latest_jahrgang = soup.findAll('div', attrs={'class': re.compile(r'\bjahrgang-latest\b') })[0].findAll('ul')[0]
|
||||
pre_latest_issue = latest_jahrgang.findAll('a')[len(latest_jahrgang.findAll('a'))-self.which_ausgabe]
|
||||
pre_latest_issue = latest_jahrgang.findAll('a')[len(latest_jahrgang.findAll('a'))-issue]
|
||||
url = pre_latest_issue.get('href', False)
|
||||
# Get the title for the magazin - build it out of the title of the cover - take the issue and year;
|
||||
self.title = "Brand Eins "+ re.search(r"(?P<date>\d\d\/\d\d\d\d+)", pre_latest_issue.find('img').get('title', False)).group('date')
|
||||
# Get month and year of the magazine issue - build it out of the title of the cover
|
||||
self.timefmt = " " + re.search(r"(?P<date>\d\d\/\d\d\d\d)", pre_latest_issue.find('img').get('title', False)).group('date')
|
||||
url = 'http://brandeins.de/'+url
|
||||
|
||||
# url = "http://www.brandeins.de/archiv/magazin/tierisch.html"
|
||||
@ -83,6 +104,7 @@ class BrandEins(BasicNewsRecipe):
|
||||
|
||||
def brand_eins_parse_latest_issue(self, url):
|
||||
soup = self.index_to_soup(url)
|
||||
self.cover_url = self.get_cover(soup)
|
||||
article_lists = [soup.find('div', attrs={'class':'subColumnLeft articleList'}), soup.find('div', attrs={'class':'subColumnRight articleList'})]
|
||||
|
||||
titles_and_articles = []
|
||||
@ -123,3 +145,4 @@ class BrandEins(BasicNewsRecipe):
|
||||
current_articles.append({'title': title, 'url': url, 'description': description, 'date':''})
|
||||
titles_and_articles.append([chapter_title, current_articles])
|
||||
return titles_and_articles
|
||||
|
||||
|
@ -1,9 +1,11 @@
|
||||
import re;
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class CNetJapan(BasicNewsRecipe):
|
||||
title = u'CNET Japan'
|
||||
oldest_article = 3
|
||||
max_articles_per_feed = 30
|
||||
__author__ = 'Hiroshi Miura'
|
||||
|
||||
feeds = [(u'cnet rss', u'http://feeds.japan.cnet.com/cnet/rss')]
|
||||
language = 'ja'
|
||||
|
69
resources/recipes/cosmopolitan.recipe
Normal file
@ -0,0 +1,69 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
|
||||
'''
|
||||
Muy Interesante
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class General(BasicNewsRecipe):
|
||||
title = 'Cosmopolitan'
|
||||
__author__ = 'Gustavo Azambuja'
|
||||
description = 'Revista Cosmopolitan, Edicion Espanola'
|
||||
language = 'es'
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
use_embedded_content = False
|
||||
recursion = 1
|
||||
encoding = 'utf8'
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
conversion_options = {'linearize_tables': True}
|
||||
|
||||
oldest_article = 180
|
||||
max_articles_per_feed = 100
|
||||
keep_only_tags = [
|
||||
dict(id=['contenido']),
|
||||
dict(name='td', attrs={'class':['contentheading', 'txt_articulo']})
|
||||
]
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['breadcrumb', 'bloque1', 'article', 'bajo_title', 'tags_articles', 'otrosenlaces_title', 'otrosenlaces_parent', 'compartir']}),
|
||||
dict(name='div', attrs={'id':'comment'}),
|
||||
dict(name='table', attrs={'class':'pagenav'}),
|
||||
dict(name=['object','link'])
|
||||
]
|
||||
remove_attributes = ['width','height', 'style', 'font', 'color']
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
|
||||
h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
|
||||
h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
|
||||
img {float:left; clear:both; margin:10px}
|
||||
p {font-family:Arial,Helvetica,sans-serif;}
|
||||
'''
|
||||
feeds = [
|
||||
(u'Articulos', u'http://feeds.feedburner.com/cosmohispano/FSSt')
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
attribs = [ 'style','font','valign'
|
||||
,'colspan','width','height'
|
||||
,'rowspan','summary','align'
|
||||
,'cellspacing','cellpadding'
|
||||
,'frames','rules','border'
|
||||
]
|
||||
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
||||
item.name = 'div'
|
||||
for attrib in attribs:
|
||||
if item.has_key(attrib):
|
||||
del item[attrib]
|
||||
return soup
|
||||
|
||||
def get_cover_url(self):
|
||||
index = 'http://www.cosmohispano.com/revista'
|
||||
soup = self.index_to_soup(index)
|
||||
link_item = soup.find('img',attrs={'class':'img_portada'})
|
||||
if link_item:
|
||||
cover_url = "http://www.cosmohispano.com"+link_item['src']
|
||||
return cover_url
|
@ -25,7 +25,7 @@ class Danas(BasicNewsRecipe):
|
||||
remove_empty_feeds = True
|
||||
extra_css = """ @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
|
||||
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
||||
.article_description,body,.lokacija{font-family: Tahoma,Arial,Helvetica,sans1,sans-serif}
|
||||
.article,.articledescription,body,.lokacija,.feed{font-family: Tahoma,Arial,Helvetica,sans1,sans-serif}
|
||||
.nadNaslov,h1,.preamble{font-family: Georgia,"Times New Roman",Times,serif1,serif}
|
||||
.antrfileText{border-left: 2px solid #999999;
|
||||
margin-left: 0.8em;
|
||||
@ -66,7 +66,7 @@ class Danas(BasicNewsRecipe):
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'left'})]
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['width_1_4','metaClanka','baner']})
|
||||
dict(name='div', attrs={'class':['width_1_4','metaClanka','baner','listaVesti','article_nav']})
|
||||
,dict(name='div', attrs={'id':'comments'})
|
||||
,dict(name=['object','link','iframe','meta'])
|
||||
]
|
||||
|
61
resources/recipes/deredactie.recipe
Normal file
@ -0,0 +1,61 @@
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class deredactie(BasicNewsRecipe):
|
||||
title = u'Deredactie.be'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
cover_url = 'http://www.deredactie.be/polopoly_fs/1.510827!image/2710428628.gif'
|
||||
language = 'de'
|
||||
keep_only_tags = []
|
||||
__author__ = 'malfi'
|
||||
keep_only_tags.append(dict(name = 'div', attrs = {'id': 'articlehead'}))
|
||||
keep_only_tags.append(dict(name = 'div', attrs = {'id': 'articlebody'}))
|
||||
remove_tags = []
|
||||
remove_tags.append(dict(name = 'div', attrs = {'id': 'story'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'id': 'useractions'}))
|
||||
remove_tags.append(dict(name = 'hr'))
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
def parse_index(self):
|
||||
categories = []
|
||||
catnames = {}
|
||||
soup = self.index_to_soup('http://www.deredactie.be/cm/vrtnieuws.deutsch')
|
||||
for elem in soup.findAll('li', attrs={'id' : re.compile("^navItem[2-9]") }):
|
||||
a = elem.find('a', href=True)
|
||||
m = re.search('(?<=/)[^/]*$', a['href'])
|
||||
cat = str(m.group(0))
|
||||
categories.append(cat)
|
||||
catnames[cat] = a['title']
|
||||
self.log("found cat %s\n" % catnames[cat])
|
||||
|
||||
feeds = []
|
||||
|
||||
for cat in categories:
|
||||
articles = []
|
||||
soup = self.index_to_soup('http://www.deredactie.be/cm/vrtnieuws.deutsch/'+cat)
|
||||
for a in soup.findAll('a',attrs={'href' : re.compile("deutsch.*/[0-9][0-9][0-9][0-9][0-9][0-9]_")}):
|
||||
skip_this_article = False
|
||||
url = a['href'].strip()
|
||||
if url.startswith('/'):
|
||||
url = 'http://www.deredactie.be' + url
|
||||
myarticle=({'title':self.tag_to_string(a), 'url':url, 'description':'', 'date':''})
|
||||
for article in articles :
|
||||
if article['url'] == url :
|
||||
skip_this_article = True
|
||||
self.log("SKIPPING DUP %s" % url)
|
||||
break
|
||||
if skip_this_article :
|
||||
continue;
|
||||
articles.append(myarticle)
|
||||
self.log("Adding URL %s\n" %url)
|
||||
if articles:
|
||||
feeds.append((catnames[cat], articles))
|
||||
return feeds
|
||||
|
42
resources/recipes/diario_sport.recipe
Normal file
@ -0,0 +1,42 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class DiarioSport(BasicNewsRecipe):
|
||||
title = u'Diario Sport'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 75
|
||||
__author__ = 'Jefferson Frantz'
|
||||
description = 'Todas las noticias del Barça y del mundo del deporte en general'
|
||||
timefmt = ' [%d %b, %Y]'
|
||||
language = 'es'
|
||||
no_stylesheets = True
|
||||
|
||||
feeds = [(u'Sport', u'http://feeds.feedburner.com/sport/ultimahora')]
|
||||
|
||||
extra_css = '''
|
||||
h2{font-family: serif; font-size: small; font-weight: bold; color: #000000; text-align: justify}
|
||||
'''
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':['noticiasMedio']})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['object','link','script','ul'])
|
||||
,dict(name='div', attrs={'id':['scrAdSense','herramientas2','participacion','participacion2','bloque1resultados','bloque2resultados','cont_vinyetesAnt','tinta','noticiasSuperior','cintillopublicidad2']})
|
||||
,dict(name='p', attrs={'class':['masinformacion','hora']})
|
||||
,dict(name='a', attrs={'class':["'link'"]})
|
||||
,dict(name='div', attrs={'class':['addthis_toolbox addthis_default_style','firma','pretitularnoticia']})
|
||||
,dict(name='form', attrs={'id':['formularioDeBusquedaAvanzada']})
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
||||
|
||||
def postprocess_html(self, soup, first_fetch):
|
||||
img = soup.find('img',src='/img/videos/mascaravideo.png')
|
||||
if not img is None:
|
||||
img.extract()
|
||||
|
||||
return soup
|
||||
|
63
resources/recipes/dnevnik.recipe
Normal file
@ -0,0 +1,63 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, BlonG'
|
||||
'''
|
||||
dnevnik.si
|
||||
'''
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class Dnevnik(BasicNewsRecipe):
|
||||
title = u'Dnevnik.si'
|
||||
__author__ = u'BlonG'
|
||||
description = u'''Dnevnik je \u010dasnik z ve\u010d kot polstoletno zgodovino.
|
||||
Pod sloganom \xbb\u017divljenje ima besedo\xab na svojih straneh prina\u0161a
|
||||
bralcem bogastvo informacij, komentarjev in kolumen in raznovrstnost
|
||||
pogledov, zaznamovanih z odgovornostjo do posameznika in \u0161ir\u0161e
|
||||
dru\u017ebe.'''
|
||||
oldest_article = 3
|
||||
max_articles_per_feed = 20
|
||||
language = 'sl'
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
|
||||
cover_url = 'https://sites.google.com/site/javno2010/home/dnevnik_cover.jpg'
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':'_iprom_inStream'}),
|
||||
dict(name='div', attrs={'class':'entry-content'}),
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':'fb_article_top'}),
|
||||
dict(name='div', attrs={'class':'related'}),
|
||||
dict(name='div', attrs={'class':'fb_article_foot'}),
|
||||
dict(name='div', attrs={'class':'spreading'}),
|
||||
dict(name='dl', attrs={'class':'ad'}),
|
||||
dict(name='p', attrs={'class':'report'}),
|
||||
dict(name='div', attrs={'class':'hfeed comments'}),
|
||||
dict(name='dl', attrs={'id':'entryPanel'}),
|
||||
dict(name='dl', attrs={'class':'infopush ip_wide'}),
|
||||
dict(name='div', attrs={'class':'sidebar'}),
|
||||
dict(name='dl', attrs={'class':'bottom'}),
|
||||
dict(name='div', attrs={'id':'footer'}),
|
||||
]
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Slovenija', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=13')
|
||||
,(u'Svet', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=14')
|
||||
,(u'EU', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=116')
|
||||
,(u'Poslovni dnevnik', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=5')
|
||||
,(u'Kronika', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=15')
|
||||
,(u'Kultura', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=17')
|
||||
,(u'Zdravje', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=18')
|
||||
,(u'Znanost in IT', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=19')
|
||||
,(u'(Ne)verjetno', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=20')
|
||||
,(u'E-strada', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=21')
|
||||
,(u'Svet vozil', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=22')
|
||||
]
|
67
resources/recipes/el_pais_uy.recipe
Normal file
@ -0,0 +1,67 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
|
||||
'''
|
||||
http://www.elpais.com.uy/
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class General(BasicNewsRecipe):
|
||||
title = 'Diario El Pais'
|
||||
__author__ = 'Gustavo Azambuja'
|
||||
description = 'Noticias | Uruguay'
|
||||
language = 'es'
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
use_embedded_content = False
|
||||
recursion = 2
|
||||
encoding = 'iso-8859-1'
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
keep_only_tags = [
|
||||
dict(name='h1'),
|
||||
dict(name='div', attrs={'id':'Contenido'})
|
||||
]
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['date_text', 'comments', 'form_section', 'share_it']}),
|
||||
dict(name='div', attrs={'id':['relatedPosts', 'spacer', 'banner_izquierda', 'right_container']}),
|
||||
dict(name='p', attrs={'class':'FacebookLikeButton'}),
|
||||
dict(name=['object','form']),
|
||||
dict(name=['object','table']) ]
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
|
||||
h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
|
||||
h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
|
||||
p {font-family:Arial,Helvetica,sans-serif;}
|
||||
'''
|
||||
feeds = [
|
||||
(u'Ultimo Momento', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=umomento'),
|
||||
(u'Editorial', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=editorial'),
|
||||
(u'Nacional', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=nacional'),
|
||||
(u'Internacional', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=internacional'),
|
||||
(u'Espectaculos', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=espectaculos'),
|
||||
(u'Deportes', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=deportes'),
|
||||
(u'Ciudades', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=ciudades'),
|
||||
(u'Economia', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=economia')
|
||||
]
|
||||
|
||||
def get_cover_url(self):
|
||||
cover_url = None
|
||||
index = 'http://www.elpais.com.uy'
|
||||
soup = self.index_to_soup(index)
|
||||
link_item = soup.find('div',attrs={'class':'boxmedio box257'})
|
||||
print link_item
|
||||
if link_item:
|
||||
cover_url = 'http://www.elpais.com.uy'+link_item.img['src']
|
||||
return cover_url
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
@ -10,6 +10,8 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class EndgadgetJapan(BasicNewsRecipe):
|
||||
title = u'Endgadget\u65e5\u672c\u7248'
|
||||
language = 'ja'
|
||||
__author__ = 'Hiroshi Miura'
|
||||
cover_url = 'http://skins18.wincustomize.com/1/49/149320/29/7578/preview-29-7578.jpg'
|
||||
masthead_url = 'http://www.blogsmithmedia.com/japanese.engadget.com/media/eng-jp-logo-t.png'
|
||||
oldest_article = 7
|
||||
|
58
resources/recipes/eu_commission.recipe
Normal file
@ -0,0 +1,58 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
LANGUAGE = 'de'
|
||||
|
||||
def feedlink(num):
|
||||
return u'http://europa.eu/rapid/syndication/QuickRSSAction.do?id='+\
|
||||
str(num)+'&lang='+ LANGUAGE
|
||||
|
||||
class EUCommissionPress(BasicNewsRecipe):
|
||||
title = u'Pressemitteilungen der EU Kommission pro Politikbereich'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
cover_url = 'http://ec.europa.eu/wel/template_2007/images/banners/banner-background.jpg'
|
||||
__author__ = 'malfi'
|
||||
language = LANGUAGE
|
||||
keep_only_tags = []
|
||||
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'pressReleaseContentMain'}))
|
||||
remove_tags = []
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'Pressemitteilung des Tages',feedlink(64)),
|
||||
(u'Presidency',feedlink(137)),
|
||||
(u'Foreign affairs and security policy',feedlink(138)),
|
||||
(u'Agriculture and rural development',feedlink(139)),
|
||||
(u'Budget and financial programming ',feedlink(140)),
|
||||
(u'Climate action',feedlink(141)),
|
||||
(u'Competition',feedlink(142)),
|
||||
(u'Development',feedlink(143)),
|
||||
(u'Digital agenda',feedlink(144)),
|
||||
(u'Economic and monetary affairs',feedlink(145)),
|
||||
(u'Education, culture, multilingualism and youth ',feedlink(146)),
|
||||
(u'Employment, social Affairs and inclusion ',feedlink(147)),
|
||||
(u'Energy',feedlink(148)),
|
||||
(u'Enlargment and European neighbourhood policy ',feedlink(149)),
|
||||
(u'Environment',feedlink(150)),
|
||||
(u'Health and consumer policy',feedlink(151)),
|
||||
(u'Home affairs',feedlink(152)),
|
||||
(u'Industry and entrepreneurship',feedlink(153)),
|
||||
(u'Inter-Institutional relations and administration',feedlink(154)),
|
||||
(u'Internal market and services',feedlink(155)),
|
||||
(u'International cooperation, humanitarian aid and crisis response',feedlink(156)),
|
||||
(u'Justice, fundamental rights and citizenship',feedlink(157)),
|
||||
(u'Maritime affairs and fisheries',feedlink(158)),
|
||||
(u'Regional policy',feedlink(159)),
|
||||
(u'Research and innovation',feedlink(160)),
|
||||
(u'Taxation and customs union, audit and anti-fraud',feedlink(161)),
|
||||
(u'Trade',feedlink(162)),
|
||||
(u'Transport',feedlink(163))
|
||||
]
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
|
51
resources/recipes/european_voice.recipe
Normal file
@ -0,0 +1,51 @@
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class EuropeanVoice(BasicNewsRecipe):
|
||||
title = u'European Voice'
|
||||
__author__ = 'malfi'
|
||||
oldest_article = 14
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
cover_url = 'http://www.europeanvoice.com/Css/images/logo.gif'
|
||||
language = 'en'
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'articleLeftColumn'})]
|
||||
remove_tags = [dict(name='div', attrs={'id':'BreadCrump'})]
|
||||
feeds = [
|
||||
(u'Whole site ',u'http://www.europeanvoice.com/Rss/2.xml'),
|
||||
(u'News and analysis',u'http://www.europeanvoice.com/Rss/6.xml'),
|
||||
(u'Comment',u'http://www.europeanvoice.com/Rss/7.xml'),
|
||||
(u'Special reports',u'http://www.europeanvoice.com/Rss/5.xml'),
|
||||
(u'People',u'http://www.europeanvoice.com/Rss/8.xml'),
|
||||
(u'Career',u'http://www.europeanvoice.com/Rss/11.xml'),
|
||||
(u'Policies',u'http://www.europeanvoice.com/Rss/4.xml'),
|
||||
(u'EVents',u'http://www.europeanvoice.com/Rss/10.xml'),
|
||||
(u'Policies - Economics',u'http://www.europeanvoice.com/Rss/31.xml'),
|
||||
(u'Policies - Business',u'http://www.europeanvoice.com/Rss/19.xml'),
|
||||
(u'Policies - Trade',u'http://www.europeanvoice.com/Rss/25.xml'),
|
||||
(u'Policies - Information society',u'http://www.europeanvoice.com/Rss/20.xml'),
|
||||
(u'Policies - Energy',u'http://www.europeanvoice.com/Rss/15.xml'),
|
||||
(u'Policies - Transport',u'http://www.europeanvoice.com/Rss/18.xml'),
|
||||
(u'Policies - Climate change',u'http://www.europeanvoice.com/Rss/16.xml'),
|
||||
(u'Policies - Environment',u'http://www.europeanvoice.com/Rss/17.xml'),
|
||||
(u'Policies - Farming & food',u'http://www.europeanvoice.com/Rss/23.xml'),
|
||||
(u'Policies - Health & society',u'http://www.europeanvoice.com/Rss/24.xml'),
|
||||
(u'Policies - Justice',u'http://www.europeanvoice.com/Rss/29.xml'),
|
||||
(u'Policies - Foreign affairs',u'http://www.europeanvoice.com/Rss/27.xml')
|
||||
]
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
|
||||
def print_version(self, url):
|
||||
return url + '?bPrint=1'
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
denied = soup.findAll(True,text='Subscribers')
|
||||
if denied:
|
||||
raise Exception('Article skipped, because content can only be seen with subscription')
|
||||
return soup
|
||||
|
100
resources/recipes/freeway.recipe
Normal file
@ -0,0 +1,100 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
|
||||
'''
|
||||
http://freeway.com.uy
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class General(BasicNewsRecipe):
|
||||
title = 'freeway.com.uy'
|
||||
__author__ = 'Gustavo Azambuja'
|
||||
description = 'Revista Freeway, Montevideo, Uruguay'
|
||||
language = 'es'
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
use_embedded_content = False
|
||||
recursion = 1
|
||||
encoding = 'utf8'
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
conversion_options = {'linearize_tables': True}
|
||||
|
||||
oldest_article = 180
|
||||
max_articles_per_feed = 100
|
||||
keep_only_tags = [
|
||||
dict(id=['contenido']),
|
||||
dict(name='a', attrs={'class':'titulo_art_ppal'}),
|
||||
dict(name='img', attrs={'class':'recuadro'}),
|
||||
dict(name='td', attrs={'class':'txt_art_ppal'})
|
||||
]
|
||||
remove_tags = [
|
||||
dict(name=['object','link'])
|
||||
]
|
||||
remove_attributes = ['width','height', 'style', 'font', 'color']
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
|
||||
h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
|
||||
h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
|
||||
img {float:left; clear:both; margin:10px}
|
||||
p {font-family:Arial,Helvetica,sans-serif;}
|
||||
'''
|
||||
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
for title, url in [('Articulos', 'http://freeway.com.uy/revista/')]:
|
||||
articles = self.art_parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
return feeds
|
||||
|
||||
def art_parse_section(self, url):
|
||||
soup = self.index_to_soup(url)
|
||||
div = soup.find(attrs={'id': 'tbl_1'})
|
||||
|
||||
current_articles = []
|
||||
for tag in div.findAllNext(attrs = {'class': 'ancho_articulos'}):
|
||||
if tag.get('class') == 'link-list-heading':
|
||||
break
|
||||
for td in tag.findAll('td'):
|
||||
a = td.find('a', attrs= {'class': 'titulo_articulos'})
|
||||
if a is None:
|
||||
continue
|
||||
title = self.tag_to_string(a)
|
||||
url = a.get('href', False)
|
||||
if not url or not title:
|
||||
continue
|
||||
if url.startswith('/'):
|
||||
url = 'http://freeway.com.uy'+url
|
||||
p = td.find('p', attrs= {'class': 'txt_articulos'})
|
||||
description = self.tag_to_string(p)
|
||||
self.log('\t\tFound article:', title)
|
||||
self.log('\t\t\t', url)
|
||||
self.log('\t\t\t', description)
|
||||
current_articles.append({'title': title, 'url': url, 'description':description, 'date':''})
|
||||
|
||||
return current_articles
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
attribs = [ 'style','font','valign'
|
||||
,'colspan','width','height'
|
||||
,'rowspan','summary','align'
|
||||
,'cellspacing','cellpadding'
|
||||
,'frames','rules','border'
|
||||
]
|
||||
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
|
||||
item.name = 'div'
|
||||
for attrib in attribs:
|
||||
if item.has_key(attrib):
|
||||
del item[attrib]
|
||||
return soup
|
||||
|
||||
def get_cover_url(self):
|
||||
#index = 'http://www.cosmohispano.com/revista'
|
||||
#soup = self.index_to_soup(index)
|
||||
#link_item = soup.find('img',attrs={'class':'img_portada'})
|
||||
#if link_item:
|
||||
# cover_url = "http://www.cosmohispano.com"+link_item['src']
|
||||
return 'http://freeway.com.uy/_upload/_n_foto_grande/noticia_1792_tapanoviembre2010.jpg'
|
@ -1,5 +1,5 @@
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = u'Marc T\xf6nsing'
|
||||
__author__ = u'Marc Toensing'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
@ -17,6 +17,7 @@ class GamespotCom(BasicNewsRecipe):
|
||||
no_javascript = True
|
||||
|
||||
feeds = [
|
||||
('All Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5'),
|
||||
('PC Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=5'),
|
||||
('XBOX 360 Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1029'),
|
||||
('Wii Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1031'),
|
||||
@ -37,5 +38,3 @@ class GamespotCom(BasicNewsRecipe):
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.get('link') + '?print=1'
|
||||
|
||||
|
||||
|
28
resources/recipes/german_gov.recipe
Normal file
@ -0,0 +1,28 @@
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class GermanGovermentPress(BasicNewsRecipe):
|
||||
title = u'Pressemitteilungen der Bundesregierung'
|
||||
oldest_article = 14
|
||||
__author__ = 'malfi'
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
cover_url = 'http://www.bundesregierung.de/static/images/logoBR.gif'
|
||||
language = 'de'
|
||||
keep_only_tags = []
|
||||
keep_only_tags.append(dict(name = 'h2'))
|
||||
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'textblack'}))
|
||||
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'subtitle'}))
|
||||
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'text'}))
|
||||
remove_tags = []
|
||||
feeds = [ (u'Pressemitteilungen',u'http://www.bundesregierung.de/Webs/Breg/DE/Service/RSS/Functions/bundesregierungPressemitteilungenRSS20,templateId=renderNewsfeed.rdf') ]
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
def print_version(self, url):
|
||||
m = re.search(r'^(.*).html$', url)
|
||||
return str(m.group(1)) + ',layoutVariant=Druckansicht.html'
|
@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env python
|
||||
__license__ = 'GPL v3'
|
||||
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__copyright__ = '2010, Szing'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
'''
|
||||
@ -10,49 +10,52 @@ globeandmail.com
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class GlobeAndMail(BasicNewsRecipe):
|
||||
title = u'Globe and Mail'
|
||||
language = 'en_CA'
|
||||
|
||||
__author__ = 'Kovid Goyal'
|
||||
class AdvancedUserRecipe1287083651(BasicNewsRecipe):
|
||||
title = u'Globe & Mail'
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'Szing'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 10
|
||||
no_stylesheets = True
|
||||
extra_css = '''
|
||||
h3 {font-size: 22pt; font-weight:bold; margin:0px; padding:0px 0px 8pt 0px;}
|
||||
h4 {margin-top: 0px;}
|
||||
#byline { font-family: monospace; font-weight:bold; }
|
||||
#placeline {font-weight:bold;}
|
||||
#credit {margin-top:0px;}
|
||||
.tag {font-size: 22pt;}'''
|
||||
description = 'Canada\'s national newspaper'
|
||||
keep_only_tags = [dict(name='article')]
|
||||
remove_tags = [dict(name='aside'),
|
||||
dict(name='footer'),
|
||||
dict(name='div', attrs={'class':(lambda x: isinstance(x, (str,unicode)) and 'articlecommentcountholder' in x.split(' '))}),
|
||||
dict(name='ul', attrs={'class':(lambda x: isinstance(x, (str,unicode)) and 'articletoolbar' in x.split(' '))}),
|
||||
]
|
||||
feeds = [
|
||||
(u'Latest headlines', u'http://www.theglobeandmail.com/?service=rss'),
|
||||
(u'Top stories', u'http://www.theglobeandmail.com/?service=rss&feed=topstories'),
|
||||
(u'National', u'http://www.theglobeandmail.com/news/national/?service=rss'),
|
||||
(u'Politics', u'http://www.theglobeandmail.com/news/politics/?service=rss'),
|
||||
(u'World', u'http://www.theglobeandmail.com/news/world/?service=rss'),
|
||||
(u'Business', u'http://www.theglobeandmail.com/report-on-business/?service=rss'),
|
||||
(u'Opinions', u'http://www.theglobeandmail.com/news/opinions/?service=rss'),
|
||||
(u'Columnists', u'http://www.theglobeandmail.com/news/opinions/columnists/?service=rss'),
|
||||
(u'Globe Investor', u'http://www.theglobeandmail.com/globe-investor/?service=rss'),
|
||||
(u'Sports', u'http://www.theglobeandmail.com/sports/?service=rss'),
|
||||
(u'Technology', u'http://www.theglobeandmail.com/news/technology/?service=rss'),
|
||||
(u'Arts', u'http://www.theglobeandmail.com/news/arts/?service=rss'),
|
||||
(u'Life', u'http://www.theglobeandmail.com/life/?service=rss'),
|
||||
(u'Blogs', u'http://www.theglobeandmail.com/blogs/?service=rss'),
|
||||
(u'Real Estate', u'http://www.theglobeandmail.com/real-estate/?service=rss'),
|
||||
(u'Auto', u'http://www.theglobeandmail.com/auto/?service=rss')
|
||||
]
|
||||
max_articles_per_feed = 100
|
||||
encoding = 'utf8'
|
||||
publisher = 'Globe & Mail'
|
||||
language = 'en_CA'
|
||||
extra_css = 'p.meta {font-size:75%}\n .redtext {color: red;}\n .byline {font-size: 70%}'
|
||||
|
||||
def get_article_url(self, article):
|
||||
url = BasicNewsRecipe.get_article_url(self, article)
|
||||
if '/video/' not in url:
|
||||
return url
|
||||
feeds = [
|
||||
(u'Top National Stories', u'http://www.theglobeandmail.com/news/national/?service=rss'),
|
||||
(u'Business', u'http://www.theglobeandmail.com/report-on-business/?service=rss'),
|
||||
(u'Commentary', u'http://www.theglobeandmail.com/report-on-business/commentary/?service=rss'),
|
||||
(u'Blogs', u'http://www.theglobeandmail.com/blogs/?service=rss'),
|
||||
(u'Facts & Arguments', u'http://www.theglobeandmail.com/life/facts-and-arguments/?service=rss'),
|
||||
(u'Technology', u'http://www.theglobeandmail.com/news/technology/?service=rss'),
|
||||
(u'Investing', u'http://www.theglobeandmail.com/globe-investor/?service=rss'),
|
||||
(u'Top Polical Stories', u'http://www.theglobeandmail.com/news/politics/?service=rss'),
|
||||
(u'Arts', u'http://www.theglobeandmail.com/news/arts/?service=rss'),
|
||||
(u'Life', u'http://www.theglobeandmail.com/life/?service=rss'),
|
||||
(u'Real Estate', u'http://www.theglobeandmail.com/real-estate/?service=rss'),
|
||||
(u'Auto', u'http://www.theglobeandmail.com/sports/?service=rss'),
|
||||
(u'Sports', u'http://www.theglobeandmail.com/auto/?service=rss')
|
||||
]
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='h1'),
|
||||
dict(name='h2', attrs={'id':'articletitle'}),
|
||||
dict(name='p', attrs={'class':['leadText', 'meta', 'leadImage', 'redtext byline', 'bodyText']}),
|
||||
dict(name='div', attrs={'class':['news','articlemeta','articlecopy']}),
|
||||
dict(name='id', attrs={'class':'article'}),
|
||||
dict(name='table', attrs={'class':'todays-market'}),
|
||||
dict(name='header', attrs={'id':'leadheader'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':['tabInside', 'ShareArticles', 'topStories']})
|
||||
]
|
||||
|
||||
#this has to be here or the text in the article appears twice.
|
||||
remove_tags_after = [dict(id='article')]
|
||||
|
||||
#Use the mobile version rather than the web version
|
||||
def print_version(self, url):
|
||||
return url + '&service=mobile'
|
||||
|
||||
|
47
resources/recipes/globes_co_il.recipe
Normal file
@ -0,0 +1,47 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import re
|
||||
|
||||
class AdvancedUserRecipe1283848012(BasicNewsRecipe):
|
||||
description = 'This is Globes.co.il.'
|
||||
cover_url = 'http://www.the7eye.org.il/SiteCollectionImages/BAKTANA/arye_avnery_010709_377.jpg'
|
||||
title = u'Globes'
|
||||
language = 'he'
|
||||
__author__ = 'marbs'
|
||||
extra_css='img {max-width:100%;} body{direction: rtl;max-width:100%;}title{direction: rtl; } article_description{direction: rtl; }, a.article{direction: rtl;max-width:100%;} calibre_feed_description{direction: rtl; }'
|
||||
simultaneous_downloads = 5
|
||||
remove_javascript = True
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
remove_attributes = ['width','style']
|
||||
|
||||
|
||||
feeds = [(u'שוק ההון', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=585'),
|
||||
(u'נדל"ן', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=607'),
|
||||
(u'וול סטריט ושווקי העולם', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=1225'),
|
||||
(u'ניתוח טכני', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=1294'),
|
||||
(u'היי טק', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=594'),
|
||||
(u'נתח שוק וצרכנות', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=821'),
|
||||
(u'דין וחשבון', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=829'),
|
||||
(u'רכב', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=3220'),
|
||||
(u'דעות', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=845'),
|
||||
(u'קניון המניות - טור שבועי', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=3175'),
|
||||
(u'סביבה', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=3221')]
|
||||
|
||||
def print_version(self, url):
|
||||
split1 = url.split("=")
|
||||
print_url = 'http://www.globes.co.il/serve/globes/printwindow.asp?did=' + split1[1]
|
||||
return print_url
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.find('tr',attrs={'bgcolor':'black'}).findPrevious('tr').extract()
|
||||
soup.find('tr',attrs={'bgcolor':'black'}).extract()
|
||||
return soup
|
||||
|
||||
def fixChars(self,string):
|
||||
# Replace lsquo (\x91)
|
||||
fixed = re.sub("■","■",string)
|
||||
return fixed
|
||||
|
||||
|
41
resources/recipes/handelsblatt.recipe
Normal file
@ -0,0 +1,41 @@
|
||||
import re
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Handelsblatt(BasicNewsRecipe):
|
||||
title = u'Handelsblatt'
|
||||
__author__ = 'malfi'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
cover_url = 'http://www.handelsblatt.com/images/logo/logo_handelsblatt.com.png'
|
||||
language = 'de'
|
||||
keep_only_tags = []
|
||||
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'structOneCol'}))
|
||||
keep_only_tags.append(dict(name = 'div', attrs = {'id': 'fullText'}))
|
||||
remove_tags = [dict(name='img', attrs = {'src': 'http://www.handelsblatt.com/images/icon/loading.gif'})]
|
||||
|
||||
feeds = [
|
||||
(u'Handelsblatt Exklusiv',u'http://www.handelsblatt.com/rss/exklusiv'),
|
||||
(u'Handelsblatt Top-Themen',u'http://www.handelsblatt.com/rss/top-themen'),
|
||||
(u'Handelsblatt Schlagzeilen',u'http://www.handelsblatt.com/rss/ticker/'),
|
||||
(u'Handelsblatt Finanzen',u'http://www.handelsblatt.com/rss/finanzen/'),
|
||||
(u'Handelsblatt Unternehmen',u'http://www.handelsblatt.com/rss/unternehmen/'),
|
||||
(u'Handelsblatt Politik',u'http://www.handelsblatt.com/rss/politik/'),
|
||||
(u'Handelsblatt Technologie',u'http://www.handelsblatt.com/rss/technologie/'),
|
||||
(u'Handelsblatt Meinung',u'http://www.handelsblatt.com/rss/meinung'),
|
||||
(u'Handelsblatt Magazin',u'http://www.handelsblatt.com/rss/magazin/'),
|
||||
(u'Handelsblatt Weblogs',u'http://www.handelsblatt.com/rss/blogs')
|
||||
]
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
|
||||
def print_version(self, url):
|
||||
m = re.search('(?<=;)[0-9]*', url)
|
||||
return u'http://www.handelsblatt.com/_b=' + str(m.group(0)) + ',_p=21,_t=ftprint,doc_page=0;printpage'
|
||||
|
||||
|
38
resources/recipes/hannoversche_zeitung.recipe
Normal file
@ -0,0 +1,38 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1287519083(BasicNewsRecipe):
|
||||
title = u'Hannoversche Allgemeine Zeitung'
|
||||
oldest_article = 1
|
||||
__author__ = 'Artemis'
|
||||
max_articles_per_feed = 30
|
||||
language = 'de'
|
||||
no_stylesheets = True
|
||||
|
||||
feeds = [
|
||||
#(u'Schlagzeilen', u'http://www.haz.de/rss/feed/haz_schlagzeilen'),
|
||||
(u'Politik', u'http://www.haz.de/rss/feed/haz_politik'),
|
||||
(u'Wirtschaft', u'http://www.haz.de/rss/feed/haz_wirtschaft'),
|
||||
(u'Panorama', u'http://www.haz.de/rss/feed/haz_panorama'),
|
||||
(u'Wissen', u'http://www.haz.de/rss/feed/haz_wissen'),
|
||||
(u'Kultur', u'http://www.haz.de/rss/feed/haz_kultur'),
|
||||
(u'Sp\xe4tvorstellung', u'http://www.haz.de/rss/feed/haz_spaetvorstellung'),
|
||||
(u'Hannover & Region', u'http://www.haz.de/rss/feed/haz_hannoverregion'),
|
||||
(u'Netzgefl\xfcster', u'http://www.haz.de/rss/feed/haz_netzgefluester'),
|
||||
(u'Meinung', u'http://www.haz.de/rss/feed/haz_meinung'),
|
||||
(u'ZiSH', u'http://www.haz.de/rss/feed/haz_zish'),
|
||||
(u'Medien', u'http://www.haz.de/rss/feed/haz_medien'),
|
||||
#(u'Sport', u'http://www.haz.de/rss/feed/haz_sport'),
|
||||
#(u'Hannover 96', u'http://www.haz.de/rss/feed/haz_hannover96')
|
||||
]
|
||||
|
||||
remove_tags_before =dict(id='modul_artikel')
|
||||
remove_tags_after =dict(id='articlecontent')
|
||||
|
||||
remove_tags = dict(id='articlesidebar')
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['articlecomment',
|
||||
'articlebookmark', 'teaser_anzeige', 'teaser_umfrage',
|
||||
'navigation', 'subnavigation']})
|
||||
]
|
||||
|
38
resources/recipes/hola.recipe
Normal file
@ -0,0 +1,38 @@
|
||||
#!/usr/bin/env python
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Brendan Sleight <bms.calibre at barwap.com>'
|
||||
'''
|
||||
hola.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Hackaday(BasicNewsRecipe):
|
||||
title = u'Hola'
|
||||
__author__ = 'bmsleight'
|
||||
description = 'diario de actualidad, moda y belleza.'
|
||||
oldest_article = 10
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
language = 'es'
|
||||
|
||||
use_embedded_content = False
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':'cuerpo'})
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Famosos' , u'http://www.hola.com/famosos/rss.xml' ),
|
||||
(u'Realeza' , u'http://www.hola.com/realeza/rss.xml' ),
|
||||
(u'Cine' , u'http://www.hola.com/cine/rss.xml' ),
|
||||
(u'Música' , u'http://www.hola.com/musica/rss.xml' ),
|
||||
(u'Moda y modelos' , u'http://www.hola.com/moda/portada/rss.xml' ),
|
||||
(u'Belleza y salud', u'http://www.hola.com/belleza/portada/rss.xml' ),
|
||||
(u'Niños' , u'http://www.hola.com/ninos/rss.xml' ),
|
||||
(u'Todas las noticias', u'http://int2.hola.com/app/feeds/rss_hola.php'),
|
||||
]
|
||||
|
||||
def get_article_url(self, article):
|
||||
url = article.get('guid', None)
|
||||
return url
|
@ -13,7 +13,6 @@ class IrishTimes(BasicNewsRecipe):
|
||||
language = 'en_IE'
|
||||
timefmt = ' (%A, %B %d, %Y)'
|
||||
|
||||
|
||||
oldest_article = 3
|
||||
no_stylesheets = True
|
||||
simultaneous_downloads= 1
|
||||
@ -33,13 +32,13 @@ class IrishTimes(BasicNewsRecipe):
|
||||
('Letters', 'http://www.irishtimes.com/feeds/rss/newspaper/letters.rss'),
|
||||
]
|
||||
|
||||
|
||||
def print_version(self, url):
|
||||
if url.count('rss.feedsportal.com'):
|
||||
u = url.replace('0Bhtml/story01.htm','_pf0Bhtml/story01.htm')
|
||||
else:
|
||||
u = url.replace('.html','_pf.html')
|
||||
return u
|
||||
if url.count('rss.feedsportal.com'):
|
||||
u = 'http://www.irishtimes.com' + \
|
||||
(((url[69:].replace('0C','/')).replace('0A','0'))).replace('0Bhtml/story01.htm','_pf.html')
|
||||
else:
|
||||
u = url.replace('.html','_pf.html')
|
||||
return u
|
||||
|
||||
def get_article_url(self, article):
|
||||
return article.link
|
||||
|
@ -6,6 +6,8 @@ __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||
www.jiji.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class JijiDotCom(BasicNewsRecipe):
|
||||
title = u'\u6642\u4e8b\u901a\u4fe1'
|
||||
__author__ = 'Hiroshi Miura'
|
||||
|
48
resources/recipes/la_diaria.recipe
Normal file
@ -0,0 +1,48 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
|
||||
'''
|
||||
ladiaria.com.uy
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class General(BasicNewsRecipe):
|
||||
title = 'La Diaria'
|
||||
__author__ = 'Gustavo Azambuja'
|
||||
description = 'Noticias de Uruguay'
|
||||
language = 'es'
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
use_embedded_content = False
|
||||
recursion = 5
|
||||
encoding = 'utf8'
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
keep_only_tags = [dict(id=['article'])]
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['byline', 'hr', 'titlebar', 'volver-arriba-right']}),
|
||||
dict(name='div', attrs={'id':'discussion'}),
|
||||
dict(name=['object','link'])
|
||||
]
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
|
||||
h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
|
||||
h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
|
||||
p {font-family:Arial,Helvetica,sans-serif;}
|
||||
'''
|
||||
feeds = [
|
||||
(u'Articulos', u'http://ladiaria.com/feeds/articulos')
|
||||
]
|
||||
|
||||
def get_cover_url(self):
|
||||
return 'http://ladiaria.com/edicion/imagenportada/'
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
@ -54,10 +54,7 @@ class LaJornada_mx(BasicNewsRecipe):
|
||||
preprocess_regexps = [
|
||||
(re.compile( r'<div class="inicial">(.*)</div><p class="s-s">'
|
||||
,re.DOTALL|re.IGNORECASE)
|
||||
,lambda match: '<p class="inicial">' + match.group(1) + '</p><p class="s-s">'),
|
||||
(re.compile( r'<q>(.*?)</q>'
|
||||
,re.DOTALL|re.IGNORECASE)
|
||||
,lambda match: '"' + match.group(1) + '"')
|
||||
,lambda match: '<p class="inicial">' + match.group(1) + '</p><p class="s-s">')
|
||||
]
|
||||
|
||||
keep_only_tags = [
|
||||
|
@ -8,7 +8,7 @@ from calibre import strftime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class LaRazon_Bol(BasicNewsRecipe):
|
||||
title = 'La Razón - Bolivia'
|
||||
title = u'La Razón - Bolivia'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'El diario nacional de Bolivia'
|
||||
publisher = 'Praxsis S.R.L.'
|
||||
|
@ -6,6 +6,8 @@ __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||
www.mainichi.jp
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class MainichiDailyNews(BasicNewsRecipe):
|
||||
title = u'\u6bce\u65e5\u65b0\u805e'
|
||||
__author__ = 'Hiroshi Miura'
|
||||
|
@ -1,3 +1,5 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class MainichiDailyITNews(BasicNewsRecipe):
|
||||
title = u'\u6bce\u65e5\u65b0\u805e(IT&\u5bb6\u96fb)'
|
||||
__author__ = 'Hiroshi Miura'
|
||||
|
35
resources/recipes/marctv.recipe
Normal file
@ -0,0 +1,35 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
'''
|
||||
Fetch MarcTV.
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class MarcTVde(BasicNewsRecipe):
|
||||
|
||||
title = 'Marc Toensings Visionen'
|
||||
|
||||
description = 'Marc Toensings Visionen'
|
||||
|
||||
language = 'de'
|
||||
|
||||
__author__ = 'Marc Toensing'
|
||||
|
||||
max_articles_per_feed = 40
|
||||
|
||||
oldest_article = 665
|
||||
|
||||
use_embedded_content = False
|
||||
|
||||
remove_tags = []
|
||||
|
||||
keep_only_tags = dict(name='div', attrs={'class':["content"]})
|
||||
|
||||
feeds = [(u'Spiele', u'http://feeds.feedburner.com/marctv/spiele'), (u'Leben', u'http://feeds.feedburner.com/marctv/leben'), (u'Medien', u'http://feeds.feedburner.com/marctv/medien')]
|
||||
|
||||
extra_css = '.#wrapper .entry p img{width:620px; height: 270px;}'
|
||||
|
||||
def get_cover_url(self):
|
||||
return 'http://marctv.de/marctv.png'
|
@ -3,13 +3,28 @@ __copyright__ = '2010, Eddie Lau'
|
||||
'''
|
||||
modified from Singtao Toronto calibre recipe by rty
|
||||
Change Log:
|
||||
2010/11/22: add English section, remove eco-news section which is not updated daily, correct
|
||||
ordering of articles
|
||||
2010/11/12: add news image and eco-news section
|
||||
2010/11/08: add parsing of finance section
|
||||
2010/11/06: temporary work-around for Kindle device having no capability to display unicode
|
||||
in section/article list.
|
||||
2010/10/31: skip repeated articles in section pages
|
||||
'''
|
||||
|
||||
import datetime
|
||||
import os, datetime, re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from contextlib import nested
|
||||
|
||||
class AdvancedUserRecipe1278063072(BasicNewsRecipe):
|
||||
|
||||
from calibre import __appname__, strftime
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||
from calibre.ebooks.metadata.toc import TOC
|
||||
from calibre.ebooks.metadata import MetaInformation
|
||||
from calibre.utils.date import now as nowf
|
||||
|
||||
class MPHKRecipe(BasicNewsRecipe):
|
||||
title = 'Ming Pao - Hong Kong'
|
||||
oldest_article = 1
|
||||
max_articles_per_feed = 100
|
||||
@ -24,27 +39,131 @@ class AdvancedUserRecipe1278063072(BasicNewsRecipe):
|
||||
encoding = 'Big5-HKSCS'
|
||||
recursions = 0
|
||||
conversion_options = {'linearize_tables':True}
|
||||
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;}'
|
||||
#extra_css = 'img {float:right; margin:4px;}'
|
||||
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
|
||||
keep_only_tags = [dict(name='h1'),
|
||||
#dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page
|
||||
dict(attrs={'class':['photo']}),
|
||||
dict(attrs={'id':['newscontent']}),
|
||||
dict(attrs={'id':['newscontent01','newscontent02']})]
|
||||
remove_tags = [dict(name='style'),
|
||||
dict(attrs={'id':['newscontent135']})] # for the finance page
|
||||
remove_attributes = ['width']
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'<h5>', re.DOTALL|re.IGNORECASE),
|
||||
lambda match: '<h1>'),
|
||||
(re.compile(r'</h5>', re.DOTALL|re.IGNORECASE),
|
||||
lambda match: '</h1>'),
|
||||
]
|
||||
|
||||
def image_url_processor(cls, baseurl, url):
|
||||
# trick: break the url at the first occurance of digit, add an additional
|
||||
# '_' at the front
|
||||
# not working, may need to move this to preprocess_html() method
|
||||
#minIdx = 10000
|
||||
#i0 = url.find('0')
|
||||
#if i0 >= 0 and i0 < minIdx:
|
||||
# minIdx = i0
|
||||
#i1 = url.find('1')
|
||||
#if i1 >= 0 and i1 < minIdx:
|
||||
# minIdx = i1
|
||||
#i2 = url.find('2')
|
||||
#if i2 >= 0 and i2 < minIdx:
|
||||
# minIdx = i2
|
||||
#i3 = url.find('3')
|
||||
#if i3 >= 0 and i0 < minIdx:
|
||||
# minIdx = i3
|
||||
#i4 = url.find('4')
|
||||
#if i4 >= 0 and i4 < minIdx:
|
||||
# minIdx = i4
|
||||
#i5 = url.find('5')
|
||||
#if i5 >= 0 and i5 < minIdx:
|
||||
# minIdx = i5
|
||||
#i6 = url.find('6')
|
||||
#if i6 >= 0 and i6 < minIdx:
|
||||
# minIdx = i6
|
||||
#i7 = url.find('7')
|
||||
#if i7 >= 0 and i7 < minIdx:
|
||||
# minIdx = i7
|
||||
#i8 = url.find('8')
|
||||
#if i8 >= 0 and i8 < minIdx:
|
||||
# minIdx = i8
|
||||
#i9 = url.find('9')
|
||||
#if i9 >= 0 and i9 < minIdx:
|
||||
# minIdx = i9
|
||||
#return url[0:minIdx] + '_' + url[minIdx+1:]
|
||||
return url
|
||||
|
||||
def get_fetchdate(self):
|
||||
dt_utc = datetime.datetime.utcnow()
|
||||
# convert UTC to local hk time - at around HKT 5.30am, all news are available
|
||||
dt_local = dt_utc - datetime.timedelta(-2.5/24)
|
||||
# convert UTC to local hk time - at around HKT 6.00am, all news are available
|
||||
dt_local = dt_utc - datetime.timedelta(-2.0/24)
|
||||
return dt_local.strftime("%Y%m%d")
|
||||
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
dateStr = self.get_fetchdate()
|
||||
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'), (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'), (u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'), (u'\u793e\u8a55\u2027\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm'), (u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'), (u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'), (u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm'), ('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'), (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm'), (u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),]:
|
||||
articles = self.parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
feeds = []
|
||||
dateStr = self.get_fetchdate()
|
||||
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
|
||||
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'),
|
||||
(u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
|
||||
(u'\u793e\u8a55\u2027\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm'),
|
||||
(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
|
||||
(u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
|
||||
(u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm'),
|
||||
('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
|
||||
(u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm'),
|
||||
(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
|
||||
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
|
||||
articles = self.parse_section(url)
|
||||
if articles:
|
||||
feeds.append((title, articles))
|
||||
# special - finance
|
||||
fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
|
||||
if fin_articles:
|
||||
feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
|
||||
# special - eco-friendly
|
||||
# eco_articles = self.parse_eco_section('http://tssl.mingpao.com/htm/marketing/eco/cfm/Eco1.cfm')
|
||||
# if eco_articles:
|
||||
# feeds.append((u'\u74b0\u4fdd Eco News', eco_articles))
|
||||
# special - entertainment
|
||||
#ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
|
||||
#if ent_articles:
|
||||
# feeds.append(('Entertainment', ent_articles))
|
||||
return feeds
|
||||
|
||||
def parse_section(self, url):
|
||||
dateStr = self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
divs = soup.findAll(attrs={'class': ['bullet','bullet_grey']})
|
||||
current_articles = []
|
||||
included_urls = []
|
||||
divs.reverse()
|
||||
for i in divs:
|
||||
a = i.find('a', href = True)
|
||||
title = self.tag_to_string(a)
|
||||
url = a.get('href', False)
|
||||
url = 'http://news.mingpao.com/' + dateStr + '/' +url
|
||||
if url not in included_urls and url.rfind('Redirect') == -1:
|
||||
current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
|
||||
included_urls.append(url)
|
||||
current_articles.reverse()
|
||||
return current_articles
|
||||
|
||||
def parse_fin_section(self, url):
|
||||
dateStr = self.get_fetchdate()
|
||||
soup = self.index_to_soup(url)
|
||||
a = soup.findAll('a', href= True)
|
||||
current_articles = []
|
||||
for i in a:
|
||||
url = i.get('href', False)
|
||||
if not url.rfind(dateStr) == -1 and url.rfind('index') == -1:
|
||||
title = self.tag_to_string(i)
|
||||
url = 'http://www.mpfinance.com/cfm/' +url
|
||||
current_articles.append({'title': title, 'url': url, 'description':''})
|
||||
return current_articles
|
||||
|
||||
def parse_eco_section(self, url):
|
||||
soup = self.index_to_soup(url)
|
||||
divs = soup.findAll(attrs={'class': ['bullet']})
|
||||
current_articles = []
|
||||
@ -53,9 +172,162 @@ class AdvancedUserRecipe1278063072(BasicNewsRecipe):
|
||||
a = i.find('a', href = True)
|
||||
title = self.tag_to_string(a)
|
||||
url = a.get('href', False)
|
||||
url = 'http://news.mingpao.com/' + dateStr + '/' +url
|
||||
if url not in included_urls:
|
||||
url = 'http://tssl.mingpao.com/htm/marketing/eco/cfm/' +url
|
||||
if url not in included_urls and url.rfind('Redirect') == -1:
|
||||
current_articles.append({'title': title, 'url': url, 'description':''})
|
||||
included_urls.append(url)
|
||||
return current_articles
|
||||
|
||||
#def parse_ent_section(self, url):
|
||||
# dateStr = self.get_fetchdate()
|
||||
# soup = self.index_to_soup(url)
|
||||
# a = soup.findAll('a', href=True)
|
||||
# current_articles = []
|
||||
# included_urls = []
|
||||
# for i in a:
|
||||
# title = self.tag_to_string(i)
|
||||
# url = 'http://ol.mingpao.com/cfm/' + i.get('href', False)
|
||||
# if url not in included_urls and not url.rfind('.txt') == -1 and not url.rfind(dateStr) == -1 and not title == '':
|
||||
# current_articles.append({'title': title, 'url': url, 'description': ''})
|
||||
# return current_articles
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll(style=True):
|
||||
del item['width']
|
||||
for item in soup.findAll(stype=True):
|
||||
del item['absmiddle']
|
||||
return soup
|
||||
|
||||
def create_opf(self, feeds, dir=None):
|
||||
#super(MPHKRecipe,self).create_opf(feeds, dir)
|
||||
if dir is None:
|
||||
dir = self.output_dir
|
||||
title = self.short_title()
|
||||
if self.output_profile.periodical_date_in_title:
|
||||
title += strftime(self.timefmt)
|
||||
mi = MetaInformation(title, [__appname__])
|
||||
mi.publisher = __appname__
|
||||
mi.author_sort = __appname__
|
||||
mi.publication_type = self.publication_type+':'+self.short_title()
|
||||
mi.timestamp = nowf()
|
||||
mi.comments = self.description
|
||||
if not isinstance(mi.comments, unicode):
|
||||
mi.comments = mi.comments.decode('utf-8', 'replace')
|
||||
mi.pubdate = nowf()
|
||||
opf_path = os.path.join(dir, 'index.opf')
|
||||
ncx_path = os.path.join(dir, 'index.ncx')
|
||||
opf = OPFCreator(dir, mi)
|
||||
# Add mastheadImage entry to <guide> section
|
||||
mp = getattr(self, 'masthead_path', None)
|
||||
if mp is not None and os.access(mp, os.R_OK):
|
||||
from calibre.ebooks.metadata.opf2 import Guide
|
||||
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
|
||||
ref.type = 'masthead'
|
||||
ref.title = 'Masthead Image'
|
||||
opf.guide.append(ref)
|
||||
|
||||
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
|
||||
manifest.append(os.path.join(dir, 'index.html'))
|
||||
manifest.append(os.path.join(dir, 'index.ncx'))
|
||||
|
||||
# Get cover
|
||||
cpath = getattr(self, 'cover_path', None)
|
||||
if cpath is None:
|
||||
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
|
||||
if self.default_cover(pf):
|
||||
cpath = pf.name
|
||||
if cpath is not None and os.access(cpath, os.R_OK):
|
||||
opf.cover = cpath
|
||||
manifest.append(cpath)
|
||||
|
||||
# Get masthead
|
||||
mpath = getattr(self, 'masthead_path', None)
|
||||
if mpath is not None and os.access(mpath, os.R_OK):
|
||||
manifest.append(mpath)
|
||||
|
||||
opf.create_manifest_from_files_in(manifest)
|
||||
for mani in opf.manifest:
|
||||
if mani.path.endswith('.ncx'):
|
||||
mani.id = 'ncx'
|
||||
if mani.path.endswith('mastheadImage.jpg'):
|
||||
mani.id = 'masthead-image'
|
||||
entries = ['index.html']
|
||||
toc = TOC(base_path=dir)
|
||||
self.play_order_counter = 0
|
||||
self.play_order_map = {}
|
||||
|
||||
def feed_index(num, parent):
|
||||
f = feeds[num]
|
||||
for j, a in enumerate(f):
|
||||
if getattr(a, 'downloaded', False):
|
||||
adir = 'feed_%d/article_%d/'%(num, j)
|
||||
auth = a.author
|
||||
if not auth:
|
||||
auth = None
|
||||
desc = a.text_summary
|
||||
if not desc:
|
||||
desc = None
|
||||
else:
|
||||
desc = self.description_limiter(desc)
|
||||
entries.append('%sindex.html'%adir)
|
||||
po = self.play_order_map.get(entries[-1], None)
|
||||
if po is None:
|
||||
self.play_order_counter += 1
|
||||
po = self.play_order_counter
|
||||
parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
|
||||
play_order=po, author=auth, description=desc)
|
||||
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
|
||||
for sp in a.sub_pages:
|
||||
prefix = os.path.commonprefix([opf_path, sp])
|
||||
relp = sp[len(prefix):]
|
||||
entries.append(relp.replace(os.sep, '/'))
|
||||
last = sp
|
||||
|
||||
if os.path.exists(last):
|
||||
with open(last, 'rb') as fi:
|
||||
src = fi.read().decode('utf-8')
|
||||
soup = BeautifulSoup(src)
|
||||
body = soup.find('body')
|
||||
if body is not None:
|
||||
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
|
||||
templ = self.navbar.generate(True, num, j, len(f),
|
||||
not self.has_single_feed,
|
||||
a.orig_url, __appname__, prefix=prefix,
|
||||
center=self.center_navbar)
|
||||
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
|
||||
body.insert(len(body.contents), elem)
|
||||
with open(last, 'wb') as fi:
|
||||
fi.write(unicode(soup).encode('utf-8'))
|
||||
if len(feeds) == 0:
|
||||
raise Exception('All feeds are empty, aborting.')
|
||||
|
||||
if len(feeds) > 1:
|
||||
for i, f in enumerate(feeds):
|
||||
entries.append('feed_%d/index.html'%i)
|
||||
po = self.play_order_map.get(entries[-1], None)
|
||||
if po is None:
|
||||
self.play_order_counter += 1
|
||||
po = self.play_order_counter
|
||||
auth = getattr(f, 'author', None)
|
||||
if not auth:
|
||||
auth = None
|
||||
desc = getattr(f, 'description', None)
|
||||
if not desc:
|
||||
desc = None
|
||||
feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
|
||||
f.title, play_order=po, description=desc, author=auth))
|
||||
|
||||
else:
|
||||
entries.append('feed_%d/index.html'%0)
|
||||
feed_index(0, toc)
|
||||
|
||||
for i, p in enumerate(entries):
|
||||
entries[i] = os.path.join(dir, p.replace('/', os.sep))
|
||||
opf.create_spine(entries)
|
||||
opf.set_toc(toc)
|
||||
|
||||
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
|
||||
opf.render(opf_file, ncx_file)
|
||||
|
||||
|
57
resources/recipes/mmc_rtv.recipe
Normal file
@ -0,0 +1,57 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, BlonG'
|
||||
'''
|
||||
www.rtvslo.si
|
||||
'''
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class MMCRTV(BasicNewsRecipe):
|
||||
title = u'MMC RTV Slovenija'
|
||||
__author__ = u'BlonG'
|
||||
description = u"Prvi interaktivni multimedijski portal, MMC RTV Slovenija"
|
||||
oldest_article = 3
|
||||
max_articles_per_feed = 20
|
||||
language = 'sl'
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
|
||||
cover_url = 'https://sites.google.com/site/javno2010/home/rtv_slo_cover.jpg'
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
|
||||
def print_version(self, url):
|
||||
split_url = url.split("/")
|
||||
print_url = 'http://www.rtvslo.si/index.php?c_mod=news&op=print&id=' + split_url[-1]
|
||||
return print_url
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':'title'}),
|
||||
dict(name='div', attrs={'id':'newsbody'}),
|
||||
dict(name='div', attrs={'id':'newsblocks'}),
|
||||
]
|
||||
# remove_tags=[
|
||||
# 40 dict(name='div', attrs={'id':'newsblocks'}),
|
||||
# ]
|
||||
|
||||
feeds = [
|
||||
(u'Slovenija', u'http://www.rtvslo.si/feeds/01.xml'),
|
||||
(u'Svet', u'http://www.rtvslo.si/feeds/02.xml'),
|
||||
(u'Evropska unija', u'http://www.rtvslo.si/feeds/16.xml'),
|
||||
(u'Gospodarstvo', u'http://www.rtvslo.si/feeds/04.xml'),
|
||||
(u'\u010crna kronika', u'http://www.rtvslo.si/feeds/08.xml'),
|
||||
(u'Okolje', u'http://www.rtvslo.si/feeds/12.xml'),
|
||||
(u'Znanost in tehnologija', u'http://www.rtvslo.si/feeds/09.xml'),
|
||||
(u'Zabava', u'http://www.rtvslo.si/feeds/06.xml'),
|
||||
(u'Ture avanture', u'http://www.rtvslo.si/feeds/28.xml'),
|
||||
]
|
||||
|
||||
# def preprocess_html(self, soup):
|
||||
# newsblocks = soup.find('div',attrs = ['id':'newsblocks'])
|
||||
# soup.find('div', attrs = {'id':'newsbody'}).insert(-1, newsblocks)
|
||||
# return soup
|
||||
|
56
resources/recipes/montevideo_com.recipe
Normal file
@ -0,0 +1,56 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
|
||||
'''
|
||||
http://www.montevideo.com.uy
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Noticias(BasicNewsRecipe):
|
||||
title = 'Montevideo COMM'
|
||||
__author__ = 'Gustavo Azambuja'
|
||||
description = 'Noticias de Uruguay'
|
||||
language = 'es'
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
use_embedded_content = False
|
||||
recursion = 5
|
||||
encoding = 'utf-8'
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
keep_only_tags = [dict(id=['txt'])]
|
||||
remove_tags = [
|
||||
dict(name=['object','link'])
|
||||
]
|
||||
|
||||
remove_attributes = ['width','height', 'style', 'font', 'color']
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
|
||||
h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
|
||||
h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
|
||||
p {font-family:Arial,Helvetica,sans-serif;}
|
||||
'''
|
||||
feeds = [
|
||||
(u'Destacados', u'http://www.montevideo.com.uy/anxml.aspx?58'),
|
||||
(u'Noticias', u'http://www.montevideo.com.uy/anxml.aspx?59'),
|
||||
(u'Tecnologia', u'http://www.montevideo.com.uy/anxml.aspx?133'),
|
||||
(u'Tiempo Libre', u'http://www.montevideo.com.uy/anxml.aspx?60'),
|
||||
# (u'Deportes', u'http://www.montevideo.com.uy/anxml.aspx?968'),
|
||||
# (u'Pantallazo', u'http://www.montevideo.com.uy/anxml.aspx?1022'),
|
||||
(u'Gastronomia', u'http://www.montevideo.com.uy/anxml.aspx?1023')
|
||||
]
|
||||
|
||||
def get_cover_url(self):
|
||||
return 'http://sphotos.ak.fbcdn.net/hphotos-ak-snc1/hs276.snc1/10319_147339559330_147337559330_2625816_6636564_n.jpg'
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
||||
|
@ -1,31 +1,33 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
moscowtimes.ru
|
||||
www.themoscowtimes.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Moscowtimes(BasicNewsRecipe):
|
||||
title = u'The Moscow Times'
|
||||
title = 'The Moscow Times'
|
||||
__author__ = 'Darko Miletic and Sujata Raman'
|
||||
description = 'News from Russia'
|
||||
language = 'en'
|
||||
lang = 'en'
|
||||
oldest_article = 7
|
||||
description = 'The Moscow Times is a daily English-language newspaper featuring objective, reliable news on business, politics, sports and culture in Moscow, in Russia and the former Soviet Union (CIS).'
|
||||
category = 'Russia, Moscow, Russian news, Moscow news, Russian newspaper, daily news, independent news, reliable news, USSR, Soviet Union, CIS, Russian politics, Russian business, Russian culture, Russian opinion, St Petersburg, Saint Petersburg'
|
||||
publisher = 'The Moscow Times'
|
||||
language = 'en'
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
#encoding = 'utf-8'
|
||||
encoding = 'cp1252'
|
||||
remove_javascript = True
|
||||
remove_empty_feeds = True
|
||||
encoding = 'cp1251'
|
||||
masthead_url = 'http://www.themoscowtimes.com/bitrix/templates/tmt/img/logo.gif'
|
||||
publication_type = 'newspaper'
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'language' : lang
|
||||
}
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
extra_css = '''
|
||||
h1{ color:#0066B3; font-family: Georgia,serif ; font-size: large}
|
||||
@ -35,39 +37,37 @@ class Moscowtimes(BasicNewsRecipe):
|
||||
.text{font-family:Arial,Tahoma,Verdana,Helvetica,sans-serif ; font-size:75%; }
|
||||
'''
|
||||
feeds = [
|
||||
(u'The Moscow Times Top Stories' , u'http://www.themoscowtimes.com/rss/top'),
|
||||
(u'The Moscow Times Current Issue' , u'http://www.themoscowtimes.com/rss/issue'),
|
||||
(u'The Moscow Times News' , u'http://www.themoscowtimes.com/rss/news'),
|
||||
(u'The Moscow Times Business' , u'http://www.themoscowtimes.com/rss/business'),
|
||||
(u'The Moscow Times Art and Ideas' , u'http://www.themoscowtimes.com/rss/art'),
|
||||
(u'The Moscow Times Opinion' , u'http://www.themoscowtimes.com/rss/opinion')
|
||||
(u'Top Stories' , u'http://www.themoscowtimes.com/rss/top' )
|
||||
,(u'Current Issue' , u'http://www.themoscowtimes.com/rss/issue' )
|
||||
,(u'News' , u'http://www.themoscowtimes.com/rss/news' )
|
||||
,(u'Business' , u'http://www.themoscowtimes.com/rss/business')
|
||||
,(u'Art and Ideas' , u'http://www.themoscowtimes.com/rss/art' )
|
||||
,(u'Opinion' , u'http://www.themoscowtimes.com/rss/opinion' )
|
||||
]
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':['newstextblock']})
|
||||
]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'id':'content'})]
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['photo_nav']})
|
||||
]
|
||||
|
||||
dict(name='div', attrs={'class':['photo_nav','phototext']})
|
||||
,dict(name=['iframe','meta','base','link','embed','object'])
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['xml:lang'] = self.lang
|
||||
soup.html['lang'] = self.lang
|
||||
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
|
||||
soup.head.insert(0,mtag)
|
||||
|
||||
return self.adeify_images(soup)
|
||||
for lnk in soup.findAll('a'):
|
||||
if lnk.string is not None:
|
||||
ind = self.tag_to_string(lnk)
|
||||
lnk.replaceWith(ind)
|
||||
return soup
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace('.themoscowtimes.com/','.themoscowtimes.com/print/')
|
||||
|
||||
def get_cover_url(self):
|
||||
|
||||
cover_url = None
|
||||
href = 'http://www.themoscowtimes.com/pdf/'
|
||||
|
||||
soup = self.index_to_soup(href)
|
||||
soup = self.index_to_soup(href)
|
||||
div = soup.find('div',attrs={'class':'left'})
|
||||
a = div.find('a')
|
||||
print a
|
||||
if a :
|
||||
cover_url = a.img['src']
|
||||
if div:
|
||||
a = div.find('a')
|
||||
if a :
|
||||
cover_url = 'http://www.themoscowtimes.com' + a.img['src']
|
||||
return cover_url
|
||||
|
@ -6,6 +6,8 @@ __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||
sankei.jp.msn.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class MSNSankeiNewsProduct(BasicNewsRecipe):
|
||||
title = u'MSN\u7523\u7d4c\u30cb\u30e5\u30fc\u30b9(\u65b0\u5546\u54c1)'
|
||||
__author__ = 'Hiroshi Miura'
|
||||
|
68
resources/recipes/newsweek_polska.recipe
Normal file
@ -0,0 +1,68 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Mateusz Kielar, matek09@gmail.com'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Newsweek(BasicNewsRecipe):
|
||||
EDITION = 0
|
||||
|
||||
title = u'Newsweek Polska'
|
||||
__author__ = 'Mateusz Kielar'
|
||||
description = 'Weekly magazine'
|
||||
encoding = 'utf-8'
|
||||
no_stylesheets = True
|
||||
language = 'en'
|
||||
remove_javascript = True
|
||||
|
||||
keep_only_tags =[]
|
||||
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'article'}))
|
||||
|
||||
remove_tags =[]
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'copy'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'url'}))
|
||||
|
||||
extra_css = '''
|
||||
.body {font-size: small}
|
||||
.author {font-size: x-small}
|
||||
.lead {font-size: x-small}
|
||||
.title{font-size: x-large; font-weight: bold}
|
||||
'''
|
||||
|
||||
def print_version(self, url):
|
||||
return url.replace("http://www.newsweek.pl/artykuly/wydanie/" + str(self.EDITION), "http://www.newsweek.pl/artykuly") + '/print'
|
||||
|
||||
def find_last_full_issue(self):
|
||||
page = self.index_to_soup('http://www.newsweek.pl/Frames/IssueCover.aspx')
|
||||
issue = 'http://www.newsweek.pl/Frames/' + page.find(lambda tag: tag.name == 'span' and not tag.attrs).a['href']
|
||||
page = self.index_to_soup(issue)
|
||||
issue = 'http://www.newsweek.pl/Frames/' + page.find(lambda tag: tag.name == 'span' and not tag.attrs).a['href']
|
||||
page = self.index_to_soup(issue)
|
||||
self.EDITION = page.find('a', attrs={'target' : '_parent'})['href'].replace('/wydania/','')
|
||||
|
||||
def parse_index(self):
|
||||
self.find_last_full_issue()
|
||||
soup = self.index_to_soup('http://www.newsweek.pl/wydania/' + str(self.EDITION))
|
||||
img = soup.find('img', id="ctl00_C1_PaperIsssueView_IssueImage", src=True)
|
||||
self.cover_url = img['src']
|
||||
feeds = []
|
||||
parent = soup.find(id='content-left-big')
|
||||
for txt in parent.findAll(attrs={'class':'txt_normal_red strong'}):
|
||||
section = self.tag_to_string(txt).capitalize()
|
||||
articles = list(self.find_articles(txt))
|
||||
feeds.append((section, articles))
|
||||
return feeds
|
||||
|
||||
def find_articles(self, txt):
|
||||
for a in txt.findAllNext( attrs={'class':['strong','hr']}):
|
||||
if a.name in "div":
|
||||
break
|
||||
yield {
|
||||
'title' : self.tag_to_string(a),
|
||||
'url' : 'http://www.newsweek.pl'+a['href'],
|
||||
'date' : '',
|
||||
'description' : ''
|
||||
}
|
||||
|
||||
|
@ -6,6 +6,8 @@ __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||
www.nikkei.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class NikkeiNet(BasicNewsRecipe):
|
||||
title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(Free)'
|
||||
__author__ = 'Hiroshi Miura'
|
||||
@ -15,34 +17,34 @@ class NikkeiNet(BasicNewsRecipe):
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 20
|
||||
language = 'ja'
|
||||
|
||||
|
||||
feeds = [ (u'\u65e5\u7d4c\u4f01\u696d', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sangyo'),
|
||||
(u'\u65e5\u7d4c\u88fd\u54c1', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=newpro'),
|
||||
(u'internet', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=internet'),
|
||||
(u'\u653f\u6cbb', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=seiji'),
|
||||
(u'\u8ca1\u52d9', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=zaimu'),
|
||||
(u'\u8ca1\u52d9', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=zaimu'),
|
||||
(u'\u7d4c\u6e08', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=keizai'),
|
||||
(u'\u56fd\u969b', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kaigai'),
|
||||
(u'\u79d1\u5b66', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kagaku'),
|
||||
(u'\u30de\u30fc\u30b1\u30c3\u30c8', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=market'),
|
||||
(u'\u304f\u3089\u3057', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kurashi'),
|
||||
(u'\u304f\u3089\u3057', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kurashi'),
|
||||
(u'\u30b9\u30dd\u30fc\u30c4', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sports'),
|
||||
(u'\u793e\u4f1a', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=shakai'),
|
||||
(u'\u793e\u4f1a', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=shakai'),
|
||||
(u'\u30a8\u30b3', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=eco'),
|
||||
(u'\u5065\u5eb7', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kenkou'),
|
||||
(u'\u96c7\u7528', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=koyou'),
|
||||
(u'\u5065\u5eb7', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kenkou'),
|
||||
(u'\u96c7\u7528', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=koyou'),
|
||||
(u'\u6559\u80b2', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kyouiku'),
|
||||
(u'\u304a\u304f\u3084\u307f', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=okuyami'),
|
||||
(u'\u4eba\u4e8b', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=zinzi'),
|
||||
(u'\u7279\u96c6', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=special'),
|
||||
(u'\u5730\u57df\u30cb\u30e5\u30fc\u30b9', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=local'),
|
||||
(u'\u7d71\u8a08\u30fb\u767d\u66f8', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=report'),
|
||||
(u'\u30e9\u30f3\u30ad\u30f3\u30b0', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=ranking'),
|
||||
(u'\u30e9\u30f3\u30ad\u30f3\u30b0', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=ranking'),
|
||||
(u'\u4f1a\u898b', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=interview'),
|
||||
(u'\u793e\u8aac\u30fb\u6625\u79cb', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=shasetsu'),
|
||||
(u'\u793e\u8aac\u30fb\u6625\u79cb', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=shasetsu'),
|
||||
(u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30d7\u30ed\u91ce\u7403', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=baseball'),
|
||||
(u'\u30b9\u30dd\u30fc\u30c4\uff1a\u5927\u30ea\u30fc\u30b0', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=mlb'),
|
||||
(u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30b5\u30c3\u30ab\u30fc', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=soccer'),
|
||||
(u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30b5\u30c3\u30ab\u30fc', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=soccer'),
|
||||
(u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30b4\u30eb\u30d5', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=golf'),
|
||||
(u'\u30b9\u30dd\u30fc\u30c4\uff1a\u76f8\u64b2', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sumou'),
|
||||
(u'\u30b9\u30dd\u30fc\u30c4\uff1a\u7af6\u99ac', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=keiba'),
|
||||
|
125
resources/recipes/nikkei_sub.recipe
Normal file
@ -0,0 +1,125 @@
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
import mechanize
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
|
||||
|
||||
class NikkeiNet_subscription(BasicNewsRecipe):
|
||||
title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248'
|
||||
__author__ = 'Hiroshi Miura'
|
||||
description = 'News and current market affairs from Japan'
|
||||
needs_subscription = True
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 20
|
||||
language = 'ja'
|
||||
remove_javascript = False
|
||||
temp_files = []
|
||||
|
||||
remove_tags_before = {'class':"cmn-section cmn-indent"}
|
||||
remove_tags = [
|
||||
{'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
|
||||
{'class':"cmn-article_keyword cmn-clearfix"},
|
||||
{'class':"cmn-print_headline cmn-clearfix"},
|
||||
]
|
||||
remove_tags_after = {'class':"cmn-pr_list"}
|
||||
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
|
||||
cj = mechanize.LWPCookieJar()
|
||||
br.set_cookiejar(cj)
|
||||
|
||||
#br.set_debug_http(True)
|
||||
#br.set_debug_redirects(True)
|
||||
#br.set_debug_responses(True)
|
||||
|
||||
if self.username is not None and self.password is not None:
|
||||
#print "----------------------------get login form--------------------------------------------"
|
||||
# open login form
|
||||
br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam')
|
||||
response = br.response()
|
||||
#print "----------------------------get login form---------------------------------------------"
|
||||
#print "----------------------------set login form---------------------------------------------"
|
||||
# remove disabled input which brings error on mechanize
|
||||
response.set_data(response.get_data().replace("<input id=\"j_id48\"", "<!-- "))
|
||||
response.set_data(response.get_data().replace("gm_home_on.gif\" />", " -->"))
|
||||
br.set_response(response)
|
||||
br.select_form(name='LA0010Form01')
|
||||
br['LA0010Form01:LA0010Email'] = self.username
|
||||
br['LA0010Form01:LA0010Password'] = self.password
|
||||
br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
|
||||
br.submit()
|
||||
br.response()
|
||||
#print "----------------------------send login form---------------------------------------------"
|
||||
#print "----------------------------open news main page-----------------------------------------"
|
||||
# open news site
|
||||
br.open('http://www.nikkei.com/')
|
||||
br.response()
|
||||
#print "----------------------------www.nikkei.com BODY --------------------------------------"
|
||||
#print response2.get_data()
|
||||
#print "-------------------------^^-got auto redirect form----^^--------------------------------"
|
||||
# forced redirect in default
|
||||
br.select_form(nr=0)
|
||||
br.submit()
|
||||
response3 = br.response()
|
||||
# return some cookie which should be set by Javascript
|
||||
#print response3.geturl()
|
||||
raw = response3.get_data()
|
||||
#print "---------------------------response to form --------------------------------------------"
|
||||
# grab cookie from JS and set it
|
||||
redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1)
|
||||
br.select_form(nr=0)
|
||||
|
||||
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
|
||||
self.temp_files[-1].write("#LWP-Cookies-2.0\n")
|
||||
|
||||
self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
|
||||
self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
|
||||
self.temp_files[-1].close()
|
||||
cj.load(self.temp_files[-1].name)
|
||||
|
||||
br.submit()
|
||||
|
||||
#br.set_debug_http(False)
|
||||
#br.set_debug_redirects(False)
|
||||
#br.set_debug_responses(False)
|
||||
return br
|
||||
|
||||
|
||||
|
||||
feeds = [ (u'\u65e5\u7d4c\u4f01\u696d', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sangyo'),
|
||||
(u'\u65e5\u7d4c\u88fd\u54c1', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=newpro'),
|
||||
(u'internet', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=internet'),
|
||||
(u'\u653f\u6cbb', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=seiji'),
|
||||
(u'\u8ca1\u52d9', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=zaimu'),
|
||||
(u'\u7d4c\u6e08', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=keizai'),
|
||||
(u'\u56fd\u969b', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kaigai'),
|
||||
(u'\u79d1\u5b66', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kagaku'),
|
||||
(u'\u30de\u30fc\u30b1\u30c3\u30c8', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=market'),
|
||||
(u'\u304f\u3089\u3057', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kurashi'),
|
||||
(u'\u30b9\u30dd\u30fc\u30c4', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sports'),
|
||||
(u'\u793e\u4f1a', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=shakai'),
|
||||
(u'\u30a8\u30b3', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=eco'),
|
||||
(u'\u5065\u5eb7', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kenkou'),
|
||||
(u'\u96c7\u7528', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=koyou'),
|
||||
(u'\u6559\u80b2', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kyouiku'),
|
||||
(u'\u304a\u304f\u3084\u307f', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=okuyami'),
|
||||
(u'\u4eba\u4e8b', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=zinzi'),
|
||||
(u'\u7279\u96c6', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=special'),
|
||||
(u'\u5730\u57df\u30cb\u30e5\u30fc\u30b9', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=local'),
|
||||
(u'\u7d71\u8a08\u30fb\u767d\u66f8', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=report'),
|
||||
(u'\u30e9\u30f3\u30ad\u30f3\u30b0', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=ranking'),
|
||||
(u'\u4f1a\u898b', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=interview'),
|
||||
(u'\u793e\u8aac\u30fb\u6625\u79cb', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=shasetsu'),
|
||||
(u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30d7\u30ed\u91ce\u7403', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=baseball'),
|
||||
(u'\u30b9\u30dd\u30fc\u30c4\uff1a\u5927\u30ea\u30fc\u30b0', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=mlb'),
|
||||
(u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30b5\u30c3\u30ab\u30fc', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=soccer'),
|
||||
(u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30b4\u30eb\u30d5', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=golf'),
|
||||
(u'\u30b9\u30dd\u30fc\u30c4\uff1a\u76f8\u64b2', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sumou'),
|
||||
(u'\u30b9\u30dd\u30fc\u30c4\uff1a\u7af6\u99ac', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=keiba'),
|
||||
(u'\u8abf\u67fb\u30fb\u30a2\u30f3\u30b1\u30fc\u30c8', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=research')
|
||||
]
|
||||
|
||||
|
||||
|
@ -6,13 +6,11 @@ __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||
www.nikkei.com
|
||||
'''
|
||||
|
||||
import string, re, sys
|
||||
from calibre import strftime
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
import mechanize
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
|
||||
|
||||
class NikkeiNet_sub_economy(BasicNewsRecipe):
|
||||
title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(\u7d4c\u6e08)'
|
||||
__author__ = 'Hiroshi Miura'
|
||||
@ -35,15 +33,15 @@ class NikkeiNet_sub_economy(BasicNewsRecipe):
|
||||
remove_tags_after = {'class':"cmn-pr_list"}
|
||||
|
||||
feeds = [ (u'\u653f\u6cbb', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=seiji'),
|
||||
(u'\u8ca1\u52d9', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=zaimu'),
|
||||
(u'\u8ca1\u52d9', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=zaimu'),
|
||||
(u'\u7d4c\u6e08', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=keizai'),
|
||||
(u'\u30de\u30fc\u30b1\u30c3\u30c8', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=market'),
|
||||
(u'\u96c7\u7528', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=koyou'),
|
||||
(u'\u96c7\u7528', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=koyou'),
|
||||
(u'\u6559\u80b2', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kyouiku'),
|
||||
(u'\u304a\u304f\u3084\u307f', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=okuyami'),
|
||||
(u'\u4eba\u4e8b', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=zinzi'),
|
||||
]
|
||||
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
|
||||
@ -70,12 +68,12 @@ class NikkeiNet_sub_economy(BasicNewsRecipe):
|
||||
br['LA0010Form01:LA0010Password'] = self.password
|
||||
br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
|
||||
br.submit()
|
||||
response1 = br.response()
|
||||
br.response()
|
||||
#print "----------------------------send login form---------------------------------------------"
|
||||
#print "----------------------------open news main page-----------------------------------------"
|
||||
# open news site
|
||||
br.open('http://www.nikkei.com/')
|
||||
response2 = br.response()
|
||||
br.response()
|
||||
#print "----------------------------www.nikkei.com BODY --------------------------------------"
|
||||
#print response2.get_data()
|
||||
#print "-------------------------^^-got auto redirect form----^^--------------------------------"
|
||||
|
@ -6,8 +6,7 @@ __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||
www.nikkei.com
|
||||
'''
|
||||
|
||||
import string, re, sys
|
||||
from calibre import strftime
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
import mechanize
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
@ -41,7 +40,7 @@ class NikkeiNet_sub_industory(BasicNewsRecipe):
|
||||
(u'\u79d1\u5b66', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kagaku'),
|
||||
|
||||
]
|
||||
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
|
||||
@ -68,12 +67,12 @@ class NikkeiNet_sub_industory(BasicNewsRecipe):
|
||||
br['LA0010Form01:LA0010Password'] = self.password
|
||||
br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
|
||||
br.submit()
|
||||
response1 = br.response()
|
||||
br.response()
|
||||
#print "----------------------------send login form---------------------------------------------"
|
||||
#print "----------------------------open news main page-----------------------------------------"
|
||||
# open news site
|
||||
br.open('http://www.nikkei.com/')
|
||||
response2 = br.response()
|
||||
br.response()
|
||||
#print "----------------------------www.nikkei.com BODY --------------------------------------"
|
||||
#print response2.get_data()
|
||||
#print "-------------------------^^-got auto redirect form----^^--------------------------------"
|
||||
|
@ -6,8 +6,7 @@ __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||
www.nikkei.com
|
||||
'''
|
||||
|
||||
import string, re, sys
|
||||
from calibre import strftime
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
import mechanize
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
@ -34,15 +33,15 @@ class NikkeiNet_sub_life(BasicNewsRecipe):
|
||||
]
|
||||
remove_tags_after = {'class':"cmn-pr_list"}
|
||||
|
||||
feeds = [ (u'\u304f\u3089\u3057', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kurashi'),
|
||||
feeds = [ (u'\u304f\u3089\u3057', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kurashi'),
|
||||
(u'\u30b9\u30dd\u30fc\u30c4', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sports'),
|
||||
(u'\u793e\u4f1a', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=shakai'),
|
||||
(u'\u793e\u4f1a', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=shakai'),
|
||||
(u'\u30a8\u30b3', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=eco'),
|
||||
(u'\u5065\u5eb7', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kenkou'),
|
||||
(u'\u5065\u5eb7', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kenkou'),
|
||||
(u'\u7279\u96c6', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=special'),
|
||||
(u'\u30e9\u30f3\u30ad\u30f3\u30b0', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=ranking')
|
||||
]
|
||||
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
|
||||
@ -69,12 +68,12 @@ class NikkeiNet_sub_life(BasicNewsRecipe):
|
||||
br['LA0010Form01:LA0010Password'] = self.password
|
||||
br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
|
||||
br.submit()
|
||||
response1 = br.response()
|
||||
br.response()
|
||||
#print "----------------------------send login form---------------------------------------------"
|
||||
#print "----------------------------open news main page-----------------------------------------"
|
||||
# open news site
|
||||
br.open('http://www.nikkei.com/')
|
||||
response2 = br.response()
|
||||
br.response()
|
||||
#print "----------------------------www.nikkei.com BODY --------------------------------------"
|
||||
#print response2.get_data()
|
||||
#print "-------------------------^^-got auto redirect form----^^--------------------------------"
|
||||
|
@ -6,8 +6,7 @@ __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||
www.nikkei.com
|
||||
'''
|
||||
|
||||
import string, re, sys
|
||||
from calibre import strftime
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
import mechanize
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
@ -35,7 +34,7 @@ class NikkeiNet_sub_main(BasicNewsRecipe):
|
||||
remove_tags_after = {'class':"cmn-pr_list"}
|
||||
|
||||
feeds = [ (u'NIKKEI', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=main')]
|
||||
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
|
||||
@ -62,12 +61,12 @@ class NikkeiNet_sub_main(BasicNewsRecipe):
|
||||
br['LA0010Form01:LA0010Password'] = self.password
|
||||
br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
|
||||
br.submit()
|
||||
response1 = br.response()
|
||||
br.response()
|
||||
#print "----------------------------send login form---------------------------------------------"
|
||||
#print "----------------------------open news main page-----------------------------------------"
|
||||
# open news site
|
||||
br.open('http://www.nikkei.com/')
|
||||
response2 = br.response()
|
||||
br.response()
|
||||
#print "----------------------------www.nikkei.com BODY --------------------------------------"
|
||||
#print response2.get_data()
|
||||
#print "-------------------------^^-got auto redirect form----^^--------------------------------"
|
||||
|
@ -6,8 +6,7 @@ __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||
www.nikkei.com
|
||||
'''
|
||||
|
||||
import string, re, sys
|
||||
from calibre import strftime
|
||||
import re
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
import mechanize
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
@ -34,15 +33,15 @@ class NikkeiNet_sub_sports(BasicNewsRecipe):
|
||||
]
|
||||
remove_tags_after = {'class':"cmn-pr_list"}
|
||||
|
||||
feeds = [
|
||||
feeds = [
|
||||
(u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30d7\u30ed\u91ce\u7403', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=baseball'),
|
||||
(u'\u30b9\u30dd\u30fc\u30c4\uff1a\u5927\u30ea\u30fc\u30b0', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=mlb'),
|
||||
(u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30b5\u30c3\u30ab\u30fc', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=soccer'),
|
||||
(u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30b5\u30c3\u30ab\u30fc', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=soccer'),
|
||||
(u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30b4\u30eb\u30d5', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=golf'),
|
||||
(u'\u30b9\u30dd\u30fc\u30c4\uff1a\u76f8\u64b2', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sumou'),
|
||||
(u'\u30b9\u30dd\u30fc\u30c4\uff1a\u7af6\u99ac', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=keiba')
|
||||
]
|
||||
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser()
|
||||
|
||||
@ -69,12 +68,12 @@ class NikkeiNet_sub_sports(BasicNewsRecipe):
|
||||
br['LA0010Form01:LA0010Password'] = self.password
|
||||
br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
|
||||
br.submit()
|
||||
response1 = br.response()
|
||||
br.response()
|
||||
#print "----------------------------send login form---------------------------------------------"
|
||||
#print "----------------------------open news main page-----------------------------------------"
|
||||
# open news site
|
||||
br.open('http://www.nikkei.com/')
|
||||
response2 = br.response()
|
||||
br.response()
|
||||
#print "----------------------------www.nikkei.com BODY --------------------------------------"
|
||||
#print response2.get_data()
|
||||
#print "-------------------------^^-got auto redirect form----^^--------------------------------"
|
||||
|
36
resources/recipes/now_toronto.recipe
Normal file
@ -0,0 +1,36 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
#Based on Lars Jacob's Taz Digiabo recipe
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Starson17'
|
||||
|
||||
import os, urllib2, zipfile
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
|
||||
class NowToronto(BasicNewsRecipe):
|
||||
title = u'Now Toronto'
|
||||
description = u'Now Toronto'
|
||||
__author__ = 'Starson17'
|
||||
language = 'en_CA'
|
||||
conversion_options = {
|
||||
'no_default_epub_cover' : True
|
||||
}
|
||||
|
||||
def build_index(self):
|
||||
epub_feed = "http://feeds.feedburner.com/NowEpubEditions"
|
||||
soup = self.index_to_soup(epub_feed)
|
||||
url = soup.find(name = 'feedburner:origlink').string
|
||||
f = urllib2.urlopen(url)
|
||||
tmp = PersistentTemporaryFile(suffix='.epub')
|
||||
self.report_progress(0,_('downloading epub'))
|
||||
tmp.write(f.read())
|
||||
tmp.close()
|
||||
zfile = zipfile.ZipFile(tmp.name, 'r')
|
||||
self.report_progress(0,_('extracting epub'))
|
||||
zfile.extractall(self.output_dir)
|
||||
tmp.close()
|
||||
index = os.path.join(self.output_dir, 'content.opf')
|
||||
self.report_progress(1,_('epub downloaded and extracted'))
|
||||
return index
|
@ -7,14 +7,22 @@ nytimes.com
|
||||
'''
|
||||
import re, string, time
|
||||
from calibre import entity_to_unicode, strftime
|
||||
from datetime import timedelta, date
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, BeautifulStoneSoup
|
||||
|
||||
|
||||
class NYTimes(BasicNewsRecipe):
|
||||
|
||||
# set headlinesOnly to True for the headlines-only version
|
||||
# set headlinesOnly to True for the headlines-only version. If True, webEdition is ignored.
|
||||
headlinesOnly = True
|
||||
|
||||
# set webEdition to True for the Web edition of the newspaper. Set oldest_article to the
|
||||
# number of days old an article can be for inclusion. If oldest_article = 0 all articles
|
||||
# will be included. Note: oldest_article is ignored if webEdition = False
|
||||
webEdition = False
|
||||
oldest_article = 7
|
||||
|
||||
# includeSections: List of sections to include. If empty, all sections found will be included.
|
||||
# Otherwise, only the sections named will be included. For example,
|
||||
#
|
||||
@ -39,20 +47,76 @@ class NYTimes(BasicNewsRecipe):
|
||||
# from an article (if one exists). If one_picture_per_article = True, the image
|
||||
# will be moved to a location between the headline and the byline.
|
||||
# If one_picture_per_article = False, all images from the article will be included
|
||||
|
||||
# and shown in their original location.
|
||||
one_picture_per_article = True
|
||||
one_picture_per_article = False
|
||||
|
||||
# The maximum number of articles that will be downloaded
|
||||
max_articles_per_feed = 100
|
||||
|
||||
# Whether to omit duplicates of articles (typically arsing when articles are indexed in
|
||||
# more than one section). If True, only the first occurance will be downloaded.
|
||||
filterDuplicates = True
|
||||
|
||||
# Sections to collect for the Web edition.
|
||||
# Delete any you don't want, or use includeSections or excludeSections
|
||||
web_sections = [(u'World',u'world'),
|
||||
(u'U.S.',u'national'),
|
||||
(u'Politics',u'politics'),
|
||||
(u'New York',u'nyregion'),
|
||||
(u'Business','business'),
|
||||
(u'Technology',u'technology'),
|
||||
(u'Sports',u'sports'),
|
||||
(u'Science',u'science'),
|
||||
(u'Health',u'health'),
|
||||
(u'Opinion',u'opinion'),
|
||||
(u'Arts',u'arts'),
|
||||
(u'Books',u'books'),
|
||||
(u'Movies',u'movies'),
|
||||
(u'Music',u'arts/music'),
|
||||
(u'Television',u'arts/television'),
|
||||
(u'Style',u'style'),
|
||||
(u'Dining & Wine',u'dining'),
|
||||
(u'Fashion & Style',u'fashion'),
|
||||
(u'Home & Garden',u'garden'),
|
||||
(u'Travel',u'travel'),
|
||||
('Education',u'education'),
|
||||
('Multimedia',u'multimedia'),
|
||||
(u'Obituaries',u'obituaries'),
|
||||
(u'Sunday Magazine',u'magazine'),
|
||||
(u'Week in Review',u'weekinreview')]
|
||||
|
||||
|
||||
if headlinesOnly:
|
||||
title='New York Times Headlines'
|
||||
description = 'Headlines from the New York Times'
|
||||
needs_subscription = False
|
||||
elif webEdition:
|
||||
title='New York Times (Web)'
|
||||
description = 'New York Times on the Web'
|
||||
needs_subscription = True
|
||||
else:
|
||||
title='New York Times'
|
||||
description = 'Today\'s New York Times'
|
||||
needs_subscription = True
|
||||
|
||||
|
||||
month_list = ['january','february','march','april','may','june','july','august','september','october','november','december']
|
||||
|
||||
def decode_us_date(self,datestr):
|
||||
udate = datestr.strip().lower().split()
|
||||
try:
|
||||
m = self.month_list.index(udate[0])+1
|
||||
except:
|
||||
return date.today()
|
||||
d = int(udate[1])
|
||||
y = int(udate[2])
|
||||
try:
|
||||
d = date(y,m,d)
|
||||
except:
|
||||
d = date.today
|
||||
return d
|
||||
|
||||
earliest_date = date.today() - timedelta(days=oldest_article)
|
||||
|
||||
__author__ = 'GRiker/Kovid Goyal/Nick Redding'
|
||||
language = 'en'
|
||||
@ -136,6 +200,12 @@ class NYTimes(BasicNewsRecipe):
|
||||
.image {text-align: center;}
|
||||
.source {text-align: left; }'''
|
||||
|
||||
|
||||
articles = {}
|
||||
key = None
|
||||
ans = []
|
||||
url_list = []
|
||||
|
||||
def filter_ans(self, ans) :
|
||||
total_article_count = 0
|
||||
idx = 0
|
||||
@ -164,6 +234,29 @@ class NYTimes(BasicNewsRecipe):
|
||||
self.log( "Queued %d articles" % total_article_count )
|
||||
return ans
|
||||
|
||||
def exclude_url(self,url):
|
||||
if not url.startswith("http"):
|
||||
return True
|
||||
if not url.endswith(".html"):
|
||||
return True
|
||||
if 'nytimes.com' not in url:
|
||||
return True
|
||||
if 'podcast' in url:
|
||||
return True
|
||||
if '/video/' in url:
|
||||
return True
|
||||
if '/slideshow/' in url:
|
||||
return True
|
||||
if '/magazine/index' in url:
|
||||
return True
|
||||
if '/interactive/' in url:
|
||||
return True
|
||||
if '/reference/' in url:
|
||||
return True
|
||||
if '/premium/' in url:
|
||||
return True
|
||||
return False
|
||||
|
||||
def fixChars(self,string):
|
||||
# Replace lsquo (\x91)
|
||||
fixed = re.sub("\x91","‘",string)
|
||||
@ -249,7 +342,6 @@ class NYTimes(BasicNewsRecipe):
|
||||
return BeautifulSoup(_raw, markupMassage=massage)
|
||||
|
||||
# Entry point
|
||||
print "index_to_soup()"
|
||||
soup = get_the_soup( self.encoding, url_or_raw )
|
||||
contentType = soup.find(True,attrs={'http-equiv':'Content-Type'})
|
||||
docEncoding = str(contentType)[str(contentType).find('charset=') + len('charset='):str(contentType).rfind('"')]
|
||||
@ -273,83 +365,110 @@ class NYTimes(BasicNewsRecipe):
|
||||
else:
|
||||
return description
|
||||
|
||||
def parse_todays_index(self):
|
||||
def feed_title(self,div):
|
||||
return ''.join(div.findAll(text=True, recursive=True)).strip()
|
||||
|
||||
def feed_title(div):
|
||||
return ''.join(div.findAll(text=True, recursive=True)).strip()
|
||||
|
||||
articles = {}
|
||||
key = None
|
||||
ans = []
|
||||
url_list = []
|
||||
|
||||
def handle_article(div):
|
||||
a = div.find('a', href=True)
|
||||
if not a:
|
||||
def handle_article(self,div):
|
||||
thumbnail = div.find('div','thumbnail')
|
||||
if thumbnail:
|
||||
thumbnail.extract()
|
||||
a = div.find('a', href=True)
|
||||
if not a:
|
||||
return
|
||||
url = re.sub(r'\?.*', '', a['href'])
|
||||
if self.exclude_url(url):
|
||||
return
|
||||
url += '?pagewanted=all'
|
||||
if self.filterDuplicates:
|
||||
if url in self.url_list:
|
||||
return
|
||||
url = re.sub(r'\?.*', '', a['href'])
|
||||
if not url.startswith("http"):
|
||||
return
|
||||
if not url.endswith(".html"):
|
||||
return
|
||||
if 'podcast' in url:
|
||||
return
|
||||
if '/video/' in url:
|
||||
return
|
||||
url += '?pagewanted=all'
|
||||
if url in url_list:
|
||||
return
|
||||
url_list.append(url)
|
||||
title = self.tag_to_string(a, use_alt=True).strip()
|
||||
description = ''
|
||||
pubdate = strftime('%a, %d %b')
|
||||
summary = div.find(True, attrs={'class':'summary'})
|
||||
if summary:
|
||||
description = self.tag_to_string(summary, use_alt=False)
|
||||
author = ''
|
||||
self.url_list.append(url)
|
||||
title = self.tag_to_string(a, use_alt=True).strip()
|
||||
description = ''
|
||||
pubdate = strftime('%a, %d %b')
|
||||
summary = div.find(True, attrs={'class':'summary'})
|
||||
if summary:
|
||||
description = self.tag_to_string(summary, use_alt=False)
|
||||
author = ''
|
||||
authorAttribution = div.find(True, attrs={'class':'byline'})
|
||||
if authorAttribution:
|
||||
author = self.tag_to_string(authorAttribution, use_alt=False)
|
||||
else:
|
||||
authorAttribution = div.find(True, attrs={'class':'byline'})
|
||||
if authorAttribution:
|
||||
author = self.tag_to_string(authorAttribution, use_alt=False)
|
||||
else:
|
||||
authorAttribution = div.find(True, attrs={'class':'byline'})
|
||||
if authorAttribution:
|
||||
author = self.tag_to_string(authorAttribution, use_alt=False)
|
||||
feed = key if key is not None else 'Uncategorized'
|
||||
if not articles.has_key(feed):
|
||||
ans.append(feed)
|
||||
articles[feed] = []
|
||||
articles[feed].append(
|
||||
dict(title=title, url=url, date=pubdate,
|
||||
description=description, author=author,
|
||||
content=''))
|
||||
feed = self.key if self.key is not None else 'Uncategorized'
|
||||
if not self.articles.has_key(feed):
|
||||
self.ans.append(feed)
|
||||
self.articles[feed] = []
|
||||
self.articles[feed].append(
|
||||
dict(title=title, url=url, date=pubdate,
|
||||
description=description, author=author,
|
||||
content=''))
|
||||
|
||||
|
||||
def parse_web_edition(self):
|
||||
|
||||
for (sec_title,index_url) in self.web_sections:
|
||||
if self.includeSections != []:
|
||||
if sec_title not in self.includeSections:
|
||||
print "SECTION NOT INCLUDED: ",sec_title
|
||||
continue
|
||||
if sec_title in self.excludeSections:
|
||||
print "SECTION EXCLUDED: ",sec_title
|
||||
continue
|
||||
print 'Index URL: '+'http://www.nytimes.com/pages/'+index_url+'/index.html'
|
||||
soup = self.index_to_soup('http://www.nytimes.com/pages/'+index_url+'/index.html')
|
||||
self.key = sec_title
|
||||
# Find each article
|
||||
for div in soup.findAll(True,
|
||||
attrs={'class':['section-headline', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}):
|
||||
if div['class'] in ['story', 'story headline'] :
|
||||
self.handle_article(div)
|
||||
elif div['class'] == 'headlinesOnly multiline flush':
|
||||
for lidiv in div.findAll('li'):
|
||||
self.handle_article(lidiv)
|
||||
|
||||
self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
|
||||
return self.filter_ans(self.ans)
|
||||
|
||||
|
||||
def parse_todays_index(self):
|
||||
|
||||
soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html')
|
||||
|
||||
|
||||
skipping = False
|
||||
# Find each article
|
||||
for div in soup.findAll(True,
|
||||
attrs={'class':['section-headline', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}):
|
||||
|
||||
if div['class'] in ['section-headline','sectionHeader']:
|
||||
key = string.capwords(feed_title(div))
|
||||
key = key.replace('Op-ed','Op-Ed')
|
||||
key = key.replace('U.s.','U.S.')
|
||||
self.key = string.capwords(self.feed_title(div))
|
||||
self.key = self.key.replace('Op-ed','Op-Ed')
|
||||
self.key = self.key.replace('U.s.','U.S.')
|
||||
self.key = self.key.replace('N.y.','N.Y.')
|
||||
skipping = False
|
||||
if self.includeSections != []:
|
||||
if self.key not in self.includeSections:
|
||||
print "SECTION NOT INCLUDED: ",self.key
|
||||
skipping = True
|
||||
if self.key in self.excludeSections:
|
||||
print "SECTION EXCLUDED: ",self.key
|
||||
skipping = True
|
||||
|
||||
elif div['class'] in ['story', 'story headline'] :
|
||||
handle_article(div)
|
||||
if not skipping:
|
||||
self.handle_article(div)
|
||||
elif div['class'] == 'headlinesOnly multiline flush':
|
||||
for lidiv in div.findAll('li'):
|
||||
handle_article(lidiv)
|
||||
if not skipping:
|
||||
self.handle_article(lidiv)
|
||||
|
||||
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
||||
return self.filter_ans(ans)
|
||||
self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
|
||||
return self.filter_ans(self.ans)
|
||||
|
||||
def parse_headline_index(self):
|
||||
|
||||
articles = {}
|
||||
ans = []
|
||||
url_list = []
|
||||
|
||||
soup = self.index_to_soup('http://www.nytimes.com/pages/todaysheadlines/')
|
||||
|
||||
# Fetch the content table
|
||||
@ -363,15 +482,24 @@ class NYTimes(BasicNewsRecipe):
|
||||
for td_col in content_table.findAll('td', {'id' : re.compile('Column')}):
|
||||
for div_sec in td_col.findAll('div',recursive=False):
|
||||
for h6_sec_name in div_sec.findAll('h6',{'style' : re.compile('text-transform: *uppercase')}):
|
||||
|
||||
section_name = self.tag_to_string(h6_sec_name,use_alt=False)
|
||||
section_name = re.sub(r'^ *$','',section_name)
|
||||
|
||||
if section_name == '':
|
||||
continue
|
||||
if self.includeSections != []:
|
||||
if section_name not in self.includeSections:
|
||||
print "SECTION NOT INCLUDED: ",section_name
|
||||
continue
|
||||
if section_name in self.excludeSections:
|
||||
print "SECTION EXCLUDED: ",section_name
|
||||
continue
|
||||
|
||||
section_name=string.capwords(section_name)
|
||||
if section_name == 'U.s.':
|
||||
section_name = 'U.S.'
|
||||
elif section_name == 'Op-ed':
|
||||
section_name = 'Op-Ed'
|
||||
section_name = section_name.replace('Op-ed','Op-Ed')
|
||||
section_name = section_name.replace('U.s.','U.S.')
|
||||
section_name = section_name.replace('N.y.','N.Y.')
|
||||
pubdate = strftime('%a, %d %b')
|
||||
|
||||
search_div = div_sec
|
||||
@ -392,37 +520,32 @@ class NYTimes(BasicNewsRecipe):
|
||||
if not a:
|
||||
continue
|
||||
url = re.sub(r'\?.*', '', a['href'])
|
||||
if not url.startswith("http"):
|
||||
continue
|
||||
if not url.endswith(".html"):
|
||||
continue
|
||||
if 'podcast' in url:
|
||||
continue
|
||||
if 'video' in url:
|
||||
if self.exclude_url(url):
|
||||
continue
|
||||
url += '?pagewanted=all'
|
||||
if url in url_list:
|
||||
continue
|
||||
url_list.append(url)
|
||||
self.log("URL %s" % url)
|
||||
if self.filterDuplicates:
|
||||
if url in self.url_list:
|
||||
continue
|
||||
self.url_list.append(url)
|
||||
title = self.tag_to_string(a, use_alt=True).strip()
|
||||
desc = h3_item.find('p')
|
||||
if desc is not None:
|
||||
description = self.tag_to_string(desc,use_alt=False)
|
||||
else:
|
||||
description = ''
|
||||
if not articles.has_key(section_name):
|
||||
ans.append(section_name)
|
||||
articles[section_name] = []
|
||||
articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
|
||||
if not self.articles.has_key(section_name):
|
||||
self.ans.append(section_name)
|
||||
self.articles[section_name] = []
|
||||
self.articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
|
||||
|
||||
|
||||
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
||||
return self.filter_ans(ans)
|
||||
self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
|
||||
return self.filter_ans(self.ans)
|
||||
|
||||
def parse_index(self):
|
||||
if self.headlinesOnly:
|
||||
return self.parse_headline_index()
|
||||
elif self.webEdition:
|
||||
return self.parse_web_edition()
|
||||
else:
|
||||
return self.parse_todays_index()
|
||||
|
||||
@ -438,6 +561,21 @@ class NYTimes(BasicNewsRecipe):
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
|
||||
if self.webEdition & (self.oldest_article>0):
|
||||
date_tag = soup.find(True,attrs={'class': ['dateline','date']})
|
||||
if date_tag:
|
||||
date_str = self.tag_to_string(date_tag,use_alt=False)
|
||||
date_str = date_str.replace('Published:','')
|
||||
date_items = date_str.split(',')
|
||||
try:
|
||||
datestring = date_items[0]+' '+date_items[1]
|
||||
article_date = self.decode_us_date(datestring)
|
||||
except:
|
||||
article_date = date.today()
|
||||
if article_date < self.earliest_date:
|
||||
self.log("Skipping article dated %s" % date_str)
|
||||
return None
|
||||
|
||||
kicker_tag = soup.find(attrs={'class':'kicker'})
|
||||
if kicker_tag: # remove Op_Ed author head shots
|
||||
tagline = self.tag_to_string(kicker_tag)
|
||||
@ -462,7 +600,6 @@ class NYTimes(BasicNewsRecipe):
|
||||
for inlineImg in inlineImgs[1:]:
|
||||
inlineImg.extract()
|
||||
# Move firstImg before article body
|
||||
#article_body = soup.find(True, {'id':'articleBody'})
|
||||
cgFirst = soup.find(True, {'class':re.compile('columnGroup *first')})
|
||||
if cgFirst:
|
||||
# Strip all sibling NavigableStrings: noise
|
||||
@ -548,4 +685,3 @@ class NYTimes(BasicNewsRecipe):
|
||||
divTag.replaceWith(tag)
|
||||
|
||||
return soup
|
||||
|
||||
|
@ -7,14 +7,22 @@ nytimes.com
|
||||
'''
|
||||
import re, string, time
|
||||
from calibre import entity_to_unicode, strftime
|
||||
from datetime import timedelta, date
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, BeautifulStoneSoup
|
||||
|
||||
|
||||
class NYTimes(BasicNewsRecipe):
|
||||
|
||||
# set headlinesOnly to True for the headlines-only version
|
||||
# set headlinesOnly to True for the headlines-only version. If True, webEdition is ignored.
|
||||
headlinesOnly = False
|
||||
|
||||
# set webEdition to True for the Web edition of the newspaper. Set oldest_article to the
|
||||
# number of days old an article can be for inclusion. If oldest_article = 0 all articles
|
||||
# will be included. Note: oldest_article is ignored if webEdition = False
|
||||
webEdition = False
|
||||
oldest_article = 7
|
||||
|
||||
# includeSections: List of sections to include. If empty, all sections found will be included.
|
||||
# Otherwise, only the sections named will be included. For example,
|
||||
#
|
||||
@ -39,20 +47,76 @@ class NYTimes(BasicNewsRecipe):
|
||||
# from an article (if one exists). If one_picture_per_article = True, the image
|
||||
# will be moved to a location between the headline and the byline.
|
||||
# If one_picture_per_article = False, all images from the article will be included
|
||||
|
||||
# and shown in their original location.
|
||||
one_picture_per_article = True
|
||||
one_picture_per_article = False
|
||||
|
||||
# The maximum number of articles that will be downloaded
|
||||
max_articles_per_feed = 100
|
||||
|
||||
# Whether to omit duplicates of articles (typically arsing when articles are indexed in
|
||||
# more than one section). If True, only the first occurance will be downloaded.
|
||||
filterDuplicates = True
|
||||
|
||||
# Sections to collect for the Web edition.
|
||||
# Delete any you don't want, or use includeSections or excludeSections
|
||||
web_sections = [(u'World',u'world'),
|
||||
(u'U.S.',u'national'),
|
||||
(u'Politics',u'politics'),
|
||||
(u'New York',u'nyregion'),
|
||||
(u'Business','business'),
|
||||
(u'Technology',u'technology'),
|
||||
(u'Sports',u'sports'),
|
||||
(u'Science',u'science'),
|
||||
(u'Health',u'health'),
|
||||
(u'Opinion',u'opinion'),
|
||||
(u'Arts',u'arts'),
|
||||
(u'Books',u'books'),
|
||||
(u'Movies',u'movies'),
|
||||
(u'Music',u'arts/music'),
|
||||
(u'Television',u'arts/television'),
|
||||
(u'Style',u'style'),
|
||||
(u'Dining & Wine',u'dining'),
|
||||
(u'Fashion & Style',u'fashion'),
|
||||
(u'Home & Garden',u'garden'),
|
||||
(u'Travel',u'travel'),
|
||||
('Education',u'education'),
|
||||
('Multimedia',u'multimedia'),
|
||||
(u'Obituaries',u'obituaries'),
|
||||
(u'Sunday Magazine',u'magazine'),
|
||||
(u'Week in Review',u'weekinreview')]
|
||||
|
||||
|
||||
if headlinesOnly:
|
||||
title='New York Times Headlines'
|
||||
description = 'Headlines from the New York Times'
|
||||
needs_subscription = False
|
||||
elif webEdition:
|
||||
title='New York Times (Web)'
|
||||
description = 'New York Times on the Web'
|
||||
needs_subscription = True
|
||||
else:
|
||||
title='New York Times'
|
||||
description = 'Today\'s New York Times'
|
||||
needs_subscription = True
|
||||
|
||||
|
||||
month_list = ['january','february','march','april','may','june','july','august','september','october','november','december']
|
||||
|
||||
def decode_us_date(self,datestr):
|
||||
udate = datestr.strip().lower().split()
|
||||
try:
|
||||
m = self.month_list.index(udate[0])+1
|
||||
except:
|
||||
return date.today()
|
||||
d = int(udate[1])
|
||||
y = int(udate[2])
|
||||
try:
|
||||
d = date(y,m,d)
|
||||
except:
|
||||
d = date.today
|
||||
return d
|
||||
|
||||
earliest_date = date.today() - timedelta(days=oldest_article)
|
||||
|
||||
__author__ = 'GRiker/Kovid Goyal/Nick Redding'
|
||||
language = 'en'
|
||||
@ -60,7 +124,6 @@ class NYTimes(BasicNewsRecipe):
|
||||
|
||||
|
||||
timefmt = ''
|
||||
needs_subscription = True
|
||||
masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
|
||||
cover_margins = (18,18,'grey99')
|
||||
|
||||
@ -137,6 +200,12 @@ class NYTimes(BasicNewsRecipe):
|
||||
.image {text-align: center;}
|
||||
.source {text-align: left; }'''
|
||||
|
||||
|
||||
articles = {}
|
||||
key = None
|
||||
ans = []
|
||||
url_list = []
|
||||
|
||||
def filter_ans(self, ans) :
|
||||
total_article_count = 0
|
||||
idx = 0
|
||||
@ -165,6 +234,29 @@ class NYTimes(BasicNewsRecipe):
|
||||
self.log( "Queued %d articles" % total_article_count )
|
||||
return ans
|
||||
|
||||
def exclude_url(self,url):
|
||||
if not url.startswith("http"):
|
||||
return True
|
||||
if not url.endswith(".html"):
|
||||
return True
|
||||
if 'nytimes.com' not in url:
|
||||
return True
|
||||
if 'podcast' in url:
|
||||
return True
|
||||
if '/video/' in url:
|
||||
return True
|
||||
if '/slideshow/' in url:
|
||||
return True
|
||||
if '/magazine/index' in url:
|
||||
return True
|
||||
if '/interactive/' in url:
|
||||
return True
|
||||
if '/reference/' in url:
|
||||
return True
|
||||
if '/premium/' in url:
|
||||
return True
|
||||
return False
|
||||
|
||||
def fixChars(self,string):
|
||||
# Replace lsquo (\x91)
|
||||
fixed = re.sub("\x91","‘",string)
|
||||
@ -250,7 +342,6 @@ class NYTimes(BasicNewsRecipe):
|
||||
return BeautifulSoup(_raw, markupMassage=massage)
|
||||
|
||||
# Entry point
|
||||
print "index_to_soup()"
|
||||
soup = get_the_soup( self.encoding, url_or_raw )
|
||||
contentType = soup.find(True,attrs={'http-equiv':'Content-Type'})
|
||||
docEncoding = str(contentType)[str(contentType).find('charset=') + len('charset='):str(contentType).rfind('"')]
|
||||
@ -274,83 +365,110 @@ class NYTimes(BasicNewsRecipe):
|
||||
else:
|
||||
return description
|
||||
|
||||
def parse_todays_index(self):
|
||||
def feed_title(self,div):
|
||||
return ''.join(div.findAll(text=True, recursive=True)).strip()
|
||||
|
||||
def feed_title(div):
|
||||
return ''.join(div.findAll(text=True, recursive=True)).strip()
|
||||
|
||||
articles = {}
|
||||
key = None
|
||||
ans = []
|
||||
url_list = []
|
||||
|
||||
def handle_article(div):
|
||||
a = div.find('a', href=True)
|
||||
if not a:
|
||||
def handle_article(self,div):
|
||||
thumbnail = div.find('div','thumbnail')
|
||||
if thumbnail:
|
||||
thumbnail.extract()
|
||||
a = div.find('a', href=True)
|
||||
if not a:
|
||||
return
|
||||
url = re.sub(r'\?.*', '', a['href'])
|
||||
if self.exclude_url(url):
|
||||
return
|
||||
url += '?pagewanted=all'
|
||||
if self.filterDuplicates:
|
||||
if url in self.url_list:
|
||||
return
|
||||
url = re.sub(r'\?.*', '', a['href'])
|
||||
if not url.startswith("http"):
|
||||
return
|
||||
if not url.endswith(".html"):
|
||||
return
|
||||
if 'podcast' in url:
|
||||
return
|
||||
if '/video/' in url:
|
||||
return
|
||||
url += '?pagewanted=all'
|
||||
if url in url_list:
|
||||
return
|
||||
url_list.append(url)
|
||||
title = self.tag_to_string(a, use_alt=True).strip()
|
||||
description = ''
|
||||
pubdate = strftime('%a, %d %b')
|
||||
summary = div.find(True, attrs={'class':'summary'})
|
||||
if summary:
|
||||
description = self.tag_to_string(summary, use_alt=False)
|
||||
author = ''
|
||||
self.url_list.append(url)
|
||||
title = self.tag_to_string(a, use_alt=True).strip()
|
||||
description = ''
|
||||
pubdate = strftime('%a, %d %b')
|
||||
summary = div.find(True, attrs={'class':'summary'})
|
||||
if summary:
|
||||
description = self.tag_to_string(summary, use_alt=False)
|
||||
author = ''
|
||||
authorAttribution = div.find(True, attrs={'class':'byline'})
|
||||
if authorAttribution:
|
||||
author = self.tag_to_string(authorAttribution, use_alt=False)
|
||||
else:
|
||||
authorAttribution = div.find(True, attrs={'class':'byline'})
|
||||
if authorAttribution:
|
||||
author = self.tag_to_string(authorAttribution, use_alt=False)
|
||||
else:
|
||||
authorAttribution = div.find(True, attrs={'class':'byline'})
|
||||
if authorAttribution:
|
||||
author = self.tag_to_string(authorAttribution, use_alt=False)
|
||||
feed = key if key is not None else 'Uncategorized'
|
||||
if not articles.has_key(feed):
|
||||
ans.append(feed)
|
||||
articles[feed] = []
|
||||
articles[feed].append(
|
||||
dict(title=title, url=url, date=pubdate,
|
||||
description=description, author=author,
|
||||
content=''))
|
||||
feed = self.key if self.key is not None else 'Uncategorized'
|
||||
if not self.articles.has_key(feed):
|
||||
self.ans.append(feed)
|
||||
self.articles[feed] = []
|
||||
self.articles[feed].append(
|
||||
dict(title=title, url=url, date=pubdate,
|
||||
description=description, author=author,
|
||||
content=''))
|
||||
|
||||
|
||||
def parse_web_edition(self):
|
||||
|
||||
for (sec_title,index_url) in self.web_sections:
|
||||
if self.includeSections != []:
|
||||
if sec_title not in self.includeSections:
|
||||
print "SECTION NOT INCLUDED: ",sec_title
|
||||
continue
|
||||
if sec_title in self.excludeSections:
|
||||
print "SECTION EXCLUDED: ",sec_title
|
||||
continue
|
||||
print 'Index URL: '+'http://www.nytimes.com/pages/'+index_url+'/index.html'
|
||||
soup = self.index_to_soup('http://www.nytimes.com/pages/'+index_url+'/index.html')
|
||||
self.key = sec_title
|
||||
# Find each article
|
||||
for div in soup.findAll(True,
|
||||
attrs={'class':['section-headline', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}):
|
||||
if div['class'] in ['story', 'story headline'] :
|
||||
self.handle_article(div)
|
||||
elif div['class'] == 'headlinesOnly multiline flush':
|
||||
for lidiv in div.findAll('li'):
|
||||
self.handle_article(lidiv)
|
||||
|
||||
self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
|
||||
return self.filter_ans(self.ans)
|
||||
|
||||
|
||||
def parse_todays_index(self):
|
||||
|
||||
soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html')
|
||||
|
||||
|
||||
skipping = False
|
||||
# Find each article
|
||||
for div in soup.findAll(True,
|
||||
attrs={'class':['section-headline', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}):
|
||||
|
||||
if div['class'] in ['section-headline','sectionHeader']:
|
||||
key = string.capwords(feed_title(div))
|
||||
key = key.replace('Op-ed','Op-Ed')
|
||||
key = key.replace('U.s.','U.S.')
|
||||
self.key = string.capwords(self.feed_title(div))
|
||||
self.key = self.key.replace('Op-ed','Op-Ed')
|
||||
self.key = self.key.replace('U.s.','U.S.')
|
||||
self.key = self.key.replace('N.y.','N.Y.')
|
||||
skipping = False
|
||||
if self.includeSections != []:
|
||||
if self.key not in self.includeSections:
|
||||
print "SECTION NOT INCLUDED: ",self.key
|
||||
skipping = True
|
||||
if self.key in self.excludeSections:
|
||||
print "SECTION EXCLUDED: ",self.key
|
||||
skipping = True
|
||||
|
||||
elif div['class'] in ['story', 'story headline'] :
|
||||
handle_article(div)
|
||||
if not skipping:
|
||||
self.handle_article(div)
|
||||
elif div['class'] == 'headlinesOnly multiline flush':
|
||||
for lidiv in div.findAll('li'):
|
||||
handle_article(lidiv)
|
||||
if not skipping:
|
||||
self.handle_article(lidiv)
|
||||
|
||||
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
||||
return self.filter_ans(ans)
|
||||
self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
|
||||
return self.filter_ans(self.ans)
|
||||
|
||||
def parse_headline_index(self):
|
||||
|
||||
articles = {}
|
||||
ans = []
|
||||
url_list = []
|
||||
|
||||
soup = self.index_to_soup('http://www.nytimes.com/pages/todaysheadlines/')
|
||||
|
||||
# Fetch the content table
|
||||
@ -364,15 +482,24 @@ class NYTimes(BasicNewsRecipe):
|
||||
for td_col in content_table.findAll('td', {'id' : re.compile('Column')}):
|
||||
for div_sec in td_col.findAll('div',recursive=False):
|
||||
for h6_sec_name in div_sec.findAll('h6',{'style' : re.compile('text-transform: *uppercase')}):
|
||||
|
||||
section_name = self.tag_to_string(h6_sec_name,use_alt=False)
|
||||
section_name = re.sub(r'^ *$','',section_name)
|
||||
|
||||
if section_name == '':
|
||||
continue
|
||||
if self.includeSections != []:
|
||||
if section_name not in self.includeSections:
|
||||
print "SECTION NOT INCLUDED: ",section_name
|
||||
continue
|
||||
if section_name in self.excludeSections:
|
||||
print "SECTION EXCLUDED: ",section_name
|
||||
continue
|
||||
|
||||
section_name=string.capwords(section_name)
|
||||
if section_name == 'U.s.':
|
||||
section_name = 'U.S.'
|
||||
elif section_name == 'Op-ed':
|
||||
section_name = 'Op-Ed'
|
||||
section_name = section_name.replace('Op-ed','Op-Ed')
|
||||
section_name = section_name.replace('U.s.','U.S.')
|
||||
section_name = section_name.replace('N.y.','N.Y.')
|
||||
pubdate = strftime('%a, %d %b')
|
||||
|
||||
search_div = div_sec
|
||||
@ -393,37 +520,32 @@ class NYTimes(BasicNewsRecipe):
|
||||
if not a:
|
||||
continue
|
||||
url = re.sub(r'\?.*', '', a['href'])
|
||||
if not url.startswith("http"):
|
||||
continue
|
||||
if not url.endswith(".html"):
|
||||
continue
|
||||
if 'podcast' in url:
|
||||
continue
|
||||
if 'video' in url:
|
||||
if self.exclude_url(url):
|
||||
continue
|
||||
url += '?pagewanted=all'
|
||||
if url in url_list:
|
||||
continue
|
||||
url_list.append(url)
|
||||
self.log("URL %s" % url)
|
||||
if self.filterDuplicates:
|
||||
if url in self.url_list:
|
||||
continue
|
||||
self.url_list.append(url)
|
||||
title = self.tag_to_string(a, use_alt=True).strip()
|
||||
desc = h3_item.find('p')
|
||||
if desc is not None:
|
||||
description = self.tag_to_string(desc,use_alt=False)
|
||||
else:
|
||||
description = ''
|
||||
if not articles.has_key(section_name):
|
||||
ans.append(section_name)
|
||||
articles[section_name] = []
|
||||
articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
|
||||
if not self.articles.has_key(section_name):
|
||||
self.ans.append(section_name)
|
||||
self.articles[section_name] = []
|
||||
self.articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
|
||||
|
||||
|
||||
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
|
||||
return self.filter_ans(ans)
|
||||
self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
|
||||
return self.filter_ans(self.ans)
|
||||
|
||||
def parse_index(self):
|
||||
if self.headlinesOnly:
|
||||
return self.parse_headline_index()
|
||||
elif self.webEdition:
|
||||
return self.parse_web_edition()
|
||||
else:
|
||||
return self.parse_todays_index()
|
||||
|
||||
@ -439,6 +561,21 @@ class NYTimes(BasicNewsRecipe):
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
|
||||
if self.webEdition & (self.oldest_article>0):
|
||||
date_tag = soup.find(True,attrs={'class': ['dateline','date']})
|
||||
if date_tag:
|
||||
date_str = self.tag_to_string(date_tag,use_alt=False)
|
||||
date_str = date_str.replace('Published:','')
|
||||
date_items = date_str.split(',')
|
||||
try:
|
||||
datestring = date_items[0]+' '+date_items[1]
|
||||
article_date = self.decode_us_date(datestring)
|
||||
except:
|
||||
article_date = date.today()
|
||||
if article_date < self.earliest_date:
|
||||
self.log("Skipping article dated %s" % date_str)
|
||||
return None
|
||||
|
||||
kicker_tag = soup.find(attrs={'class':'kicker'})
|
||||
if kicker_tag: # remove Op_Ed author head shots
|
||||
tagline = self.tag_to_string(kicker_tag)
|
||||
@ -463,7 +600,6 @@ class NYTimes(BasicNewsRecipe):
|
||||
for inlineImg in inlineImgs[1:]:
|
||||
inlineImg.extract()
|
||||
# Move firstImg before article body
|
||||
#article_body = soup.find(True, {'id':'articleBody'})
|
||||
cgFirst = soup.find(True, {'class':re.compile('columnGroup *first')})
|
||||
if cgFirst:
|
||||
# Strip all sibling NavigableStrings: noise
|
||||
|
63
resources/recipes/observa_digital.recipe
Normal file
@ -0,0 +1,63 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
|
||||
'''
|
||||
observa.com.uy
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Noticias(BasicNewsRecipe):
|
||||
title = 'Observa Digital'
|
||||
__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
|
||||
description = 'Noticias desde Uruguay'
|
||||
language = 'es'
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
use_embedded_content = False
|
||||
recursion = 5
|
||||
encoding = 'utf8'
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
oldest_article = 2
|
||||
max_articles_per_feed = 100
|
||||
keep_only_tags = [dict(id=['contenido'])]
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'id':'contenedorVinculadas'}),
|
||||
dict(name='p', attrs={'id':'nota_firma'}),
|
||||
dict(name=['object','link'])
|
||||
]
|
||||
|
||||
remove_attributes = ['width','height', 'style', 'font', 'color']
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
|
||||
h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
|
||||
h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
|
||||
p {font-family:Arial,Helvetica,sans-serif;}
|
||||
'''
|
||||
feeds = [
|
||||
(u'Actualidad', u'http://www.observa.com.uy/RSS/actualidad.xml'),
|
||||
(u'Deportes', u'http://www.observa.com.uy/RSS/deportes.xml'),
|
||||
(u'Vida', u'http://www.observa.com.uy/RSS/vida.xml'),
|
||||
(u'Ciencia y Tecnologia', u'http://www.observa.com.uy/RSS/ciencia.xml')
|
||||
]
|
||||
|
||||
def get_cover_url(self):
|
||||
cover_url = None
|
||||
index = 'http://www.elobservador.com.uy/elobservador/nav_portada.asp?suplemento=dia'
|
||||
soup = self.index_to_soup(index)
|
||||
link_item = soup.find('img',attrs={'usemap':'#mapeo_imagenes'})
|
||||
if link_item:
|
||||
cover_url = 'http://www.elobservador.com.uy'+link_item['src'].strip()
|
||||
|
||||
print cover_url
|
||||
|
||||
return cover_url
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
@ -21,8 +21,16 @@ class Pagina12(BasicNewsRecipe):
|
||||
use_embedded_content = False
|
||||
language = 'es'
|
||||
remove_empty_feeds = True
|
||||
publication_type = 'newspaper'
|
||||
masthead_url = 'http://www.pagina12.com.ar/commons/imgs/logo-home.gif'
|
||||
extra_css = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} #autor{font-weight: bold} #fecha,#epigrafe{font-size: 0.9em; margin: 5px} #imagen{border: 1px solid black; margin: 0 0 1.25em 1.25em; width: 232px } '
|
||||
extra_css = """
|
||||
body{font-family: Arial,Helvetica,sans-serif }
|
||||
img{margin-bottom: 0.4em; display:block}
|
||||
#autor{font-weight: bold}
|
||||
#fecha,#epigrafe{font-size: 0.9em; margin: 5px}
|
||||
#imagen{border: 1px solid black; margin: 0 0 1.25em 1.25em; width: 232px }
|
||||
.fgprincipal{font-size: large; font-weight: bold}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
@ -31,7 +39,11 @@ class Pagina12(BasicNewsRecipe):
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'id':['volver','logo','logo_suple','fin','permalink']})]
|
||||
remove_tags = [
|
||||
dict(name=['meta','link'])
|
||||
,dict(name='div', attrs={'id':['volver','logo','logo_suple','fin','permalink']})
|
||||
]
|
||||
remove_attributes=['lang']
|
||||
|
||||
|
||||
feeds = [
|
||||
@ -65,4 +77,13 @@ class Pagina12(BasicNewsRecipe):
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
for item in soup.findAll('span', attrs={'id':'seccion'}):
|
||||
it = item.a
|
||||
it.name='span'
|
||||
del it['href']
|
||||
del it['title']
|
||||
for item in soup.findAll('p'):
|
||||
it = item.find('h3')
|
||||
if it:
|
||||
it.name='span'
|
||||
return soup
|
70
resources/recipes/pc_lab.recipe
Normal file
@ -0,0 +1,70 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
class PCLab(BasicNewsRecipe):
|
||||
cover_url = 'http://pclab.pl/img/logo.png'
|
||||
title = u"PC Lab"
|
||||
__author__ = 'ravcio - rlelusz[at]gmail.com'
|
||||
description = u"Articles from PC Lab website"
|
||||
language = 'pl'
|
||||
oldest_article = 30.0
|
||||
max_articles_per_feed = 100
|
||||
recursions = 0
|
||||
encoding = 'iso-8859-2'
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
use_embedded_content = False
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'class':['substance']})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['chapters']})
|
||||
,dict(name='div', attrs={'id':['script_bxad_slot_display_list_bxad_slot']})
|
||||
]
|
||||
|
||||
remove_tags_after = [
|
||||
dict(name='div', attrs={'class':['navigation']})
|
||||
]
|
||||
|
||||
#links to RSS feeds
|
||||
feeds = [ ('PCLab', u'http://pclab.pl/xml/artykuly.xml') ]
|
||||
|
||||
#load second and subsequent page content
|
||||
# in: soup - full page with 'next' button
|
||||
# out: appendtag - tag to which new page is to be added
|
||||
def append_page(self, soup, appendtag):
|
||||
# find the 'Next' button
|
||||
pager = soup.find('div', attrs={'class':'next'})
|
||||
|
||||
if pager:
|
||||
#search for 'a' element with link to next page (exit if not found)
|
||||
a = pager.find('a')
|
||||
if a:
|
||||
nexturl = a['href']
|
||||
|
||||
soup2 = self.index_to_soup('http://pclab.pl/' + nexturl)
|
||||
|
||||
pagetext_substance = soup2.find('div', attrs={'class':'substance'})
|
||||
pagetext = pagetext_substance.find('div', attrs={'class':'data'})
|
||||
pagetext.extract()
|
||||
|
||||
pos = len(appendtag.contents)
|
||||
appendtag.insert(pos, pagetext)
|
||||
pos = len(appendtag.contents)
|
||||
|
||||
self.append_page(soup2, appendtag)
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
|
||||
# soup.body contains no title and no navigator, they are in soup
|
||||
self.append_page(soup, soup.body)
|
||||
|
||||
# finally remove some tags
|
||||
tags = soup.findAll('div',attrs={'class':['tags', 'index', 'script_bxad_slot_display_list_bxad_slot', 'index first', 'zumi', 'navigation']})
|
||||
[tag.extract() for tag in tags]
|
||||
|
||||
return soup
|
@ -1,13 +1,10 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
politika.rs
|
||||
'''
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
|
||||
class Politika(BasicNewsRecipe):
|
||||
title = 'Politika Online'
|
||||
@ -19,53 +16,51 @@ class Politika(BasicNewsRecipe):
|
||||
max_articles_per_feed = 100
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
remove_javascript = True
|
||||
encoding = 'utf8'
|
||||
language = 'sr'
|
||||
|
||||
lang = 'sr-Latn-RS'
|
||||
direction = 'ltr'
|
||||
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
|
||||
delay = 1
|
||||
language = 'sr'
|
||||
publication_type = 'newspaper'
|
||||
masthead_url = 'http://static.politika.co.rs/images_new/politika.gif'
|
||||
extra_css = """
|
||||
@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
|
||||
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
|
||||
body{font-family: Arial,Helvetica,sans1,sans-serif}
|
||||
h1{font-family: "Times New Roman",Times,serif1,serif}
|
||||
.articledescription{font-family: sans1, sans-serif}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : lang
|
||||
, 'pretty_print' : True
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
|
||||
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'content_center_border'})]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['send_print','txt-komentar']})
|
||||
,dict(name=['object','link','a'])
|
||||
,dict(name='h1', attrs={'class':'box_header-tags'})
|
||||
]
|
||||
|
||||
|
||||
keep_only_tags = [dict(name='div', attrs={'class':'big_article_home item_details'})]
|
||||
remove_tags_after = dict(attrs={'class':'online_date'})
|
||||
remove_tags = [dict(name=['link','meta','iframe','embed','object'])]
|
||||
|
||||
feeds = [
|
||||
(u'Politika' , u'http://www.politika.rs/rubrike/Politika/index.1.lt.xml' )
|
||||
,(u'Svet' , u'http://www.politika.rs/rubrike/Svet/index.1.lt.xml' )
|
||||
,(u'Redakcijski komentari', u'http://www.politika.rs/rubrike/redakcijski-komentari/index.1.lt.xml')
|
||||
,(u'Pogledi' , u'http://www.politika.rs/pogledi/index.lt.xml' )
|
||||
,(u'Pogledi sa strane' , u'http://www.politika.rs/rubrike/Pogledi-sa-strane/index.1.lt.xml' )
|
||||
,(u'Tema dana' , u'http://www.politika.rs/rubrike/tema-dana/index.1.lt.xml' )
|
||||
,(u'Kultura' , u'http://www.politika.rs/rubrike/Kultura/index.1.lt.xml' )
|
||||
,(u'Zivot i stil' , u'http://www.politika.rs/rubrike/zivot-i-stil/index.1.lt.xml' )
|
||||
(u'Politika' , u'http://www.politika.rs/rubrike/Politika/index.1.lt.xml' )
|
||||
,(u'Svet' , u'http://www.politika.rs/rubrike/Svet/index.1.lt.xml' )
|
||||
,(u'Ostali komentari' , u'http://www.politika.rs/rubrike/ostali-komentari/index.1.lt.xml' )
|
||||
,(u'Pogledi' , u'http://www.politika.rs/pogledi/index.lt.xml' )
|
||||
,(u'Pogledi sa strane', u'http://www.politika.rs/rubrike/Pogledi-sa-strane/index.1.lt.xml')
|
||||
,(u'Tema dana' , u'http://www.politika.rs/rubrike/tema-dana/index.1.lt.xml' )
|
||||
,(u'Kultura' , u'http://www.politika.rs/rubrike/Kultura/index.1.lt.xml' )
|
||||
,(u'Spektar' , u'http://www.politika.rs/rubrike/zivot-i-stil/index.1.lt.xml' )
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
soup.html['lang'] = self.lang
|
||||
soup.html['dir' ] = self.direction
|
||||
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
|
||||
soup.head.insert(0,mlang)
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
ftag = soup.find('div',attrs={'class':'content_center_border'})
|
||||
if ftag.has_key('align'):
|
||||
del ftag['align']
|
||||
return self.adeify_images(soup)
|
||||
for item in soup.findAll('a', attrs={'class':'category'}):
|
||||
item.name='span'
|
||||
if item.has_key('href'):
|
||||
del item['href']
|
||||
if item.has_key('title'):
|
||||
del item['title']
|
||||
return soup
|
||||
|
68
resources/recipes/polityka.recipe
Normal file
@ -0,0 +1,68 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Mateusz Kielar, matek09@gmail.com'
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Polityka(BasicNewsRecipe):
|
||||
|
||||
title = u'Polityka'
|
||||
__author__ = 'Mateusz Kielar'
|
||||
description = 'Weekly magazine. Last archive issue'
|
||||
encoding = 'utf-8'
|
||||
no_stylesheets = True
|
||||
language = 'en'
|
||||
remove_javascript = True
|
||||
|
||||
remove_tags_before = dict(dict(name = 'h2', attrs = {'class' : 'box_nag'}))
|
||||
remove_tags_after = dict(dict(name = 'div', attrs = {'class' : 'box_footer'}))
|
||||
|
||||
remove_tags =[]
|
||||
remove_tags.append(dict(name = 'h2', attrs = {'class' : 'box_nag'}))
|
||||
remove_tags.append(dict(name = 'div', attrs = {'class' : 'box_footer'}))
|
||||
|
||||
|
||||
extra_css = '''
|
||||
h1 {font-size: x-large; font-weight: bold}
|
||||
'''
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup('http://archiwum.polityka.pl/')
|
||||
box_img3 = soup.findAll(attrs={'class' : 'box_img3'})
|
||||
feeds = []
|
||||
last = 0
|
||||
self.cover_url = 'http://archiwum.polityka.pl' + box_img3[-1].find('img')['src']
|
||||
last_edition = 'http://archiwum.polityka.pl' + box_img3[-1].find('a')['href']
|
||||
|
||||
while True:
|
||||
index = self.index_to_soup(last_edition)
|
||||
|
||||
|
||||
box_list = index.findAll('div', attrs={'class' : 'box_list'})
|
||||
if len(box_list) == 0:
|
||||
break
|
||||
|
||||
articles = {}
|
||||
for box in box_list:
|
||||
for div in box.findAll('div', attrs={'class': 'list_tresc'}):
|
||||
article_page = self.index_to_soup('http://archiwum.polityka.pl' + div.a['href'],)
|
||||
section = self.tag_to_string(article_page.find('h2', attrs = {'class' : 'box_nag'})).split('/')[0].lstrip().rstrip()
|
||||
print section
|
||||
if not articles.has_key(section):
|
||||
articles[section] = []
|
||||
articles[section].append( {
|
||||
'title' : self.tag_to_string(div.a),
|
||||
'url' : 'http://archiwum.polityka.pl' + div.a['href'],
|
||||
'date' : '',
|
||||
'description' : ''
|
||||
})
|
||||
|
||||
for section in articles:
|
||||
feeds.append((section, articles[section]))
|
||||
|
||||
last_edition = last_edition.replace('http://archiwum.polityka.pl/wydanie/' + str(last), 'http://archiwum.polityka.pl/wydanie/' + str(last + 1))
|
||||
last = last + 1
|
||||
|
||||
return feeds
|
||||
|
18
resources/recipes/rds.recipe
Normal file
@ -0,0 +1,18 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1290013720(BasicNewsRecipe):
|
||||
title = u'RDS'
|
||||
__author__ = 'Nexus'
|
||||
language = 'en_CA'
|
||||
description = 'Hockey News'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 25
|
||||
no_stylesheets = True
|
||||
remove_tags = [dict(name='div', attrs={'id':'rdsWrap'}),
|
||||
dict(name='table', attrs={'id':'aVoir'}),
|
||||
dict(name='div', attrs={'id':'imageChronique'})]
|
||||
keep_only_tags = [dict(name='div', attrs={'id':['enteteChronique']}),
|
||||
dict(name='div', attrs={'id':['contenuChronique']})]
|
||||
|
||||
|
||||
feeds = [(u'RDS', u'http://www.rds.ca/hockey/fildepresse_rds.xml')]
|
54
resources/recipes/revista_bla.recipe
Normal file
@ -0,0 +1,54 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
|
||||
'''
|
||||
http://www.revistabla.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Noticias(BasicNewsRecipe):
|
||||
title = 'Revista Bla'
|
||||
__author__ = 'Gustavo Azambuja'
|
||||
description = 'Moda | Uruguay'
|
||||
language = 'es'
|
||||
timefmt = '[%a, %d %b, %Y]'
|
||||
use_embedded_content = False
|
||||
recursion = 5
|
||||
encoding = 'utf8'
|
||||
remove_javascript = True
|
||||
no_stylesheets = True
|
||||
|
||||
oldest_article = 20
|
||||
max_articles_per_feed = 100
|
||||
keep_only_tags = [dict(id=['body_container'])]
|
||||
remove_tags = [
|
||||
dict(name='div', attrs={'class':['date_text', 'comments', 'form_section', 'share_it']}),
|
||||
dict(name='div', attrs={'id':['relatedPosts', 'spacer', 'banner_izquierda', 'right_container']}),
|
||||
dict(name='p', attrs={'class':'FacebookLikeButton'}),
|
||||
dict(name=['object','link']) ]
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
|
||||
h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
|
||||
h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
|
||||
p {font-family:Arial,Helvetica,sans-serif;}
|
||||
'''
|
||||
feeds = [
|
||||
(u'Articulos', u'http://www.revistabla.com/feed/')
|
||||
]
|
||||
|
||||
def get_cover_url(self):
|
||||
cover_url = None
|
||||
index = 'http://www.revistabla.com'
|
||||
soup = self.index_to_soup(index)
|
||||
link_item = soup.find('div',attrs={'class':'header_right'})
|
||||
if link_item:
|
||||
cover_url = link_item.img['src']
|
||||
return cover_url
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
@ -108,3 +108,10 @@ class RevistaMuyInteresante(BasicNewsRecipe):
|
||||
feeds.append((title, articles))
|
||||
return feeds
|
||||
|
||||
def get_cover_url(self):
|
||||
index = 'http://www.muyinteresante.es/revista'
|
||||
soup = self.index_to_soup(index)
|
||||
link_item = soup.find('img',attrs={'class':'img_portada'})
|
||||
if link_item:
|
||||
cover_url = "http://www.muyinteresante.es"+link_item['src']
|
||||
return cover_url
|
||||
|
69
resources/recipes/rollingstone.recipe
Normal file
@ -0,0 +1,69 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
|
||||
'''
|
||||
rollingstone.com
|
||||
'''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class RollingStone(BasicNewsRecipe):
|
||||
title = 'Rolling Stone Magazine - free content'
|
||||
__author__ = 'Darko Miletic'
|
||||
description = 'Rolling Stone Magazine features music, album and artist news, movie reviews, political, economic and pop culture commentary, videos, photos, and more.'
|
||||
publisher = 'Werner Media inc.'
|
||||
category = 'news, music, USA, world'
|
||||
oldest_article = 15
|
||||
max_articles_per_feed = 200
|
||||
no_stylesheets = True
|
||||
encoding = 'utf8'
|
||||
use_embedded_content = False
|
||||
language = 'en'
|
||||
remove_empty_feeds = True
|
||||
publication_type = 'magazine'
|
||||
masthead_url = 'http://www.rollingstone.com/templates/rolling-stone-templates/theme/rstheme/images/rsLogo.png'
|
||||
extra_css = """
|
||||
body{font-family: Georgia,Times,serif }
|
||||
img{margin-bottom: 0.4em; display:block}
|
||||
"""
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
preprocess_regexps = [
|
||||
(re.compile(r'xml:lang="en">.*?<head>', re.DOTALL|re.IGNORECASE),lambda match: 'xml:lang="en">\n<head>\n')
|
||||
,(re.compile(r'</title>.*?</head>' , re.DOTALL|re.IGNORECASE),lambda match: '</title>\n</head>\n' )
|
||||
]
|
||||
|
||||
keep_only_tags=[
|
||||
dict(attrs={'class':['headerImgHolder','headerContent']})
|
||||
,dict(name='div',attrs={'id':['teaser','storyTextContainer']})
|
||||
,dict(name='div',attrs={'class':'blogDetailModule clearfix'})
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['meta','iframe','object','embed'])
|
||||
,dict(attrs={'id':'mpStoryHeader'})
|
||||
,dict(attrs={'class':'relatedTopics'})
|
||||
]
|
||||
remove_attributes=['lang','onclick','width','height','name']
|
||||
remove_tags_before=dict(attrs={'class':'bloggerInfo'})
|
||||
remove_tags_after=dict(attrs={'class':'relatedTopics'})
|
||||
|
||||
|
||||
feeds = [
|
||||
(u'All News' , u'http://www.rollingstone.com/siteServices/rss/allNews' )
|
||||
,(u'All Blogs' , u'http://www.rollingstone.com/siteServices/rss/allBlogs' )
|
||||
,(u'Movie Reviews' , u'http://www.rollingstone.com/siteServices/rss/movieReviews' )
|
||||
,(u'Album Reviews' , u'http://www.rollingstone.com/siteServices/rss/albumReviews' )
|
||||
,(u'Song Reviews' , u'http://www.rollingstone.com/siteServices/rss/songReviews' )
|
||||
]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for item in soup.findAll(style=True):
|
||||
del item['style']
|
||||
return soup
|
73
resources/recipes/scprint.recipe
Normal file
@ -0,0 +1,73 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe, LoginFailed
|
||||
|
||||
class SCPrintMagazine(BasicNewsRecipe):
|
||||
title = u'SC Print Magazine'
|
||||
__author__ = u'Tony Maro'
|
||||
description = u'Last print version of the data security magazine'
|
||||
INDEX = "http://www.scmagazineus.com/issuearchive/"
|
||||
no_stylesheets = True
|
||||
language = 'en'
|
||||
keep_only_tags = [dict(id=['article','review'])]
|
||||
remove_tags = [dict(id=['articlePrintTools','reviewBodyColumn'])]
|
||||
LOG_IN = 'http://www.scmagazineus.com/login/'
|
||||
tags = 'News,SC Magazine'
|
||||
needs_subscription = True
|
||||
|
||||
def parse_index(self):
|
||||
articles = []
|
||||
issuelink = printsections = None
|
||||
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
sectit = soup.find('div', attrs={'class':'issueArchiveItem'})
|
||||
if sectit is not None:
|
||||
linkt = sectit.find('a')
|
||||
issuelink = linkt['href']
|
||||
imgt = sectit.find('img')
|
||||
self.cover_url = imgt['src']
|
||||
|
||||
if issuelink is not None:
|
||||
issue = self.index_to_soup(issuelink)
|
||||
if issue is not None:
|
||||
printsections = issue.findAll('div',attrs={'class':'PrintSection'})
|
||||
if printsections is not None:
|
||||
for printsection in printsections:
|
||||
onesection = []
|
||||
sectiontitle = printsection.find('h3').contents[0]
|
||||
articlesec = printsection.findAll('div',attrs={'class':'IssueArchiveFormat'})
|
||||
if articlesec is not None:
|
||||
''' got articles '''
|
||||
for onearticle in articlesec:
|
||||
''' process one article '''
|
||||
arttitlet = onearticle.find('h3')
|
||||
if arttitlet is not None:
|
||||
mylink = arttitlet.find('a')
|
||||
if mylink is not None:
|
||||
if mylink.has_key('title'):
|
||||
arttitle = mylink['title']
|
||||
else:
|
||||
arttitle = 'unknown'
|
||||
if mylink.has_key('href'):
|
||||
artlink = mylink['href']
|
||||
artlink = artlink.replace("/article","/printarticle")
|
||||
artlink = artlink.replace("/review","/printreview")
|
||||
deck = onearticle.find('div',attrs={'class':'deck'})
|
||||
if deck is not None:
|
||||
deck = deck.contents[0]
|
||||
onesection.append({'title':arttitle, 'url':artlink, 'description':deck,'date':''})
|
||||
articles.append((sectiontitle, onesection))
|
||||
|
||||
return articles
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
br.open(self.LOG_IN)
|
||||
br.select_form(name='aspnetForm')
|
||||
br['ctl00$ctl00$cphAllPageContent$cphMainContent$SubscriberEasyLoginView1$txtEmail'] = self.username
|
||||
br['ctl00$ctl00$cphAllPageContent$cphMainContent$SubscriberEasyLoginView1$txtPassword'] = self.password
|
||||
raw = br.submit("ctl00$ctl00$cphAllPageContent$cphMainContent$SubscriberEasyLoginView1$btnLogin").read()
|
||||
if 'Logout</a>' not in raw:
|
||||
raise LoginFailed(
|
||||
_('Failed to log in, check your username and password for'
|
||||
' the calibre Periodicals service.'))
|
||||
return br
|
||||
|
55
resources/recipes/siol.recipe
Normal file
@ -0,0 +1,55 @@
|
||||
# coding: utf-8
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, BlonG'
|
||||
'''
|
||||
www.siol.si
|
||||
'''
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
class Siol(BasicNewsRecipe):
|
||||
title = u'Siol.net'
|
||||
__author__ = u'BlonG'
|
||||
description = "Multimedijski portal z aktualnimi vsebinami, intervjuji, komentarji iz Slovenije in sveta, sportal, trendi, avtomoto, blogos"
|
||||
oldest_article = 3
|
||||
language = 'sl'
|
||||
max_articles_per_feed = 20
|
||||
no_stylesheets = True
|
||||
use_embedded_content = False
|
||||
|
||||
cover_url = 'https://sites.google.com/site/javno2010/home/siol_cover.jpg'
|
||||
|
||||
extra_css = '''
|
||||
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
h2{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
|
||||
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
|
||||
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
|
||||
'''
|
||||
|
||||
html2lrf_options = ['--base-font-size', '10']
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='div', attrs={'id':'idContent'}),
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name='span', attrs={'class':'com1'}),
|
||||
dict(name='div', attrs={'class':'relation'}),
|
||||
dict(name='p', attrs={'class':'path'}),
|
||||
dict(name='div', attrs={'class':'clear_r'}),
|
||||
dict(name='div', attrs={'id':'appendix'}),
|
||||
dict(name='div', attrs={'id':'rail'}),
|
||||
dict(name='div', attrs={'id':'div_comments'}),
|
||||
dict(name='div', attrs={'class':'thumbs'}),
|
||||
]
|
||||
|
||||
feeds = [
|
||||
(u'Slovenija', u'http://www.siol.net/rss.aspx?path=Slovenija')
|
||||
,(u'Lokalne novice', u'http://www.siol.net/rss.aspx?path=Slovenija/Lokalne_novice')
|
||||
,(u'EU', u'http://www.siol.net/rss.aspx?path=EU')
|
||||
,(u'Svet', u'http://www.siol.net/rss.aspx?path=Svet')
|
||||
,(u'Gospodarstvo', u'http://www.siol.net/rss.aspx?path=Gospodarstvo')
|
||||
,(u'Sportal', u'http://www.siol.net/rss.aspx?path=Sportal')
|
||||
,(u'Trendi', u'http://www.siol.net/rss.aspx?path=Trendi')
|
||||
,(u'Avtomoto', u'http://www.siol.net/rss.aspx?path=Avtomoto')
|
||||
,(u'Tehnologija', u'http://www.siol.net/rss.aspx?path=Tehnologija')
|
||||
,(u'TV / Film', u'http://www.siol.net/rss.aspx?path=TV')
|
||||
]
|
@ -6,6 +6,7 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
|
||||
spiegel.de
|
||||
'''
|
||||
|
||||
from time import strftime
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class Spiegel_ger(BasicNewsRecipe):
|
||||
@ -44,3 +45,6 @@ class Spiegel_ger(BasicNewsRecipe):
|
||||
rmain, rsep, rrest = main.rpartition(',')
|
||||
purl = rmain + ',druck-' + rrest + ',' + rest
|
||||
return purl
|
||||
|
||||
def get_cover_url(self):
|
||||
return 'http://wissen.spiegel.de/wissen/titel/SP/' + strftime("%Y/%W/%j/titel.jpg")
|
||||
|
@ -7,7 +7,7 @@ class AdvancedUserRecipe1284927619(BasicNewsRecipe):
|
||||
__author__ = 'noxxx'
|
||||
max_articles_per_feed = 100
|
||||
description = 'tagesanzeiger.ch: Nichts verpassen'
|
||||
category = 'News, Politik, Nachrichten, Schweiz, Zürich'
|
||||
category = 'News, Politik, Nachrichten, Schweiz, Zuerich'
|
||||
language = 'de'
|
||||
|
||||
conversion_options = {
|
||||
|
@ -3,12 +3,12 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Gerhard Aigner <gerhard.aigner at gmail.com>'
|
||||
|
||||
''' http://www.derstandard.at - Austrian Newspaper '''
|
||||
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class TelepolisNews(BasicNewsRecipe):
|
||||
title = u'Telepolis (News)'
|
||||
title = u'Telepolis (News+Artikel)'
|
||||
__author__ = 'Gerhard Aigner'
|
||||
publisher = 'Heise Zeitschriften Verlag GmbH & Co KG'
|
||||
description = 'News from telepolis'
|
||||
@ -20,16 +20,16 @@ class TelepolisNews(BasicNewsRecipe):
|
||||
encoding = "utf-8"
|
||||
language = 'de_AT'
|
||||
|
||||
use_embedded_content = False
|
||||
use_embedded_content =False
|
||||
remove_empty_feeds = True
|
||||
|
||||
preprocess_regexps = [(re.compile(r'<a[^>]*>', re.DOTALL|re.IGNORECASE), lambda match: ''),
|
||||
(re.compile(r'</a>', re.DOTALL|re.IGNORECASE), lambda match: ''),]
|
||||
|
||||
keep_only_tags = [dict(name = 'table',attrs={'class':'blogtable'})]
|
||||
remove_tags = [dict(name='img'), dict(name='td',attrs={'class':'blogbottom'})]
|
||||
keep_only_tags = [dict(name = 'td',attrs={'class':'bloghead'}),dict(name = 'td',attrs={'class':'blogfliess'})]
|
||||
remove_tags = [dict(name='img'), dict(name='td',attrs={'class':'blogbottom'}), dict(name='td',attrs={'class':'forum'})]
|
||||
|
||||
feeds = [(u'News', u'http://www.heise.de/tp/news.rdf')]
|
||||
feeds = [(u'News', u'http://www.heise.de/tp/news-atom.xml')]
|
||||
|
||||
html2lrf_options = [
|
||||
'--comment' , description
|
||||
@ -41,7 +41,7 @@ class TelepolisNews(BasicNewsRecipe):
|
||||
|
||||
def get_article_url(self, article):
|
||||
'''if the linked article is of kind artikel don't take it'''
|
||||
if (article.link.count('artikel') > 0) :
|
||||
if (article.link.count('artikel') > 1) :
|
||||
return None
|
||||
return article.link
|
||||
|
||||
@ -49,3 +49,5 @@ class TelepolisNews(BasicNewsRecipe):
|
||||
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
|
||||
soup.head.insert(0,mtag)
|
||||
return soup
|
||||
|
||||
|
||||
|
@ -6,6 +6,8 @@ __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
|
||||
www.h-online.com
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class TheHeiseOnline(BasicNewsRecipe):
|
||||
title = u'The H'
|
||||
__author__ = 'Hiroshi Miura'
|
||||
|
19
resources/recipes/thn.recipe
Normal file
@ -0,0 +1,19 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1289990851(BasicNewsRecipe):
|
||||
title = u'The Hockey News'
|
||||
language = 'en_CA'
|
||||
__author__ = 'Nexus'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 25
|
||||
no_stylesheets = True
|
||||
remove_tags = [dict(name='div', attrs={'class':'article_info'}),
|
||||
dict(name='div', attrs={'class':'photo_details'}),
|
||||
dict(name='div', attrs={'class':'tool_menu'}),
|
||||
dict(name='div', attrs={'id':'comments_container'}),
|
||||
dict(name='div', attrs={'id':'wrapper'})]
|
||||
keep_only_tags = [dict(name='h1', attrs={'class':['headline']}),
|
||||
dict(name='div', attrs={'class':['box_container']})]
|
||||
|
||||
feeds = [(u'THN', u'http://www.thehockeynews.com/rss/all_categories.xml')]
|
||||
|
34
resources/recipes/tsn.recipe
Normal file
@ -0,0 +1,34 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class AdvancedUserRecipe1289990851(BasicNewsRecipe):
|
||||
title = u'TSN'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 50
|
||||
language = 'en_CA'
|
||||
__author__ = 'Nexus'
|
||||
no_stylesheets = True
|
||||
INDEX = 'http://tsn.ca/nhl/story/?id=nhl'
|
||||
keep_only_tags = [dict(name='div', attrs={'id':['tsnColWrap']}),
|
||||
dict(name='div', attrs={'id':['tsnStory']})]
|
||||
remove_tags = [dict(name='div', attrs={'id':'tsnRelated'}),
|
||||
dict(name='div', attrs={'class':'textSize'})]
|
||||
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
feed_parts = soup.findAll('div', attrs={'class': 'feature'})
|
||||
for feed_part in feed_parts:
|
||||
articles = []
|
||||
if not feed_part.h2:
|
||||
continue
|
||||
feed_title = feed_part.h2.string
|
||||
article_parts = feed_part.findAll('a')
|
||||
for article_part in article_parts:
|
||||
article_title = article_part.string
|
||||
article_date = ''
|
||||
article_url = 'http://tsn.ca/' + article_part['href']
|
||||
articles.append({'title': article_title, 'url': article_url, 'description':'', 'date':article_date})
|
||||
if articles:
|
||||
feeds.append((feed_title, articles))
|
||||
return feeds
|
||||
|
195
resources/recipes/vedomosti.recipe
Normal file
@ -0,0 +1,195 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
u'''
|
||||
Ведомости
|
||||
'''
|
||||
|
||||
from calibre.web.feeds.feedparser import parse
|
||||
from calibre.ebooks.BeautifulSoup import Tag
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class VedomostiRecipe(BasicNewsRecipe):
|
||||
title = u'Ведомости'
|
||||
__author__ = 'Nikolai Kotchetkov'
|
||||
publisher = 'vedomosti.ru'
|
||||
category = 'press, Russia'
|
||||
description = u'Ежедневная деловая газета'
|
||||
oldest_article = 3
|
||||
max_articles_per_feed = 100
|
||||
|
||||
masthead_url = u'http://motorro.com/imgdir/logos/ved_logo_black2_cropped.gif'
|
||||
cover_url = u'http://motorro.com/imgdir/logos/ved_logo_black2_cropped.gif'
|
||||
|
||||
#Add feed names if you want them to be sorted (feeds of this list appear first)
|
||||
sortOrder = [u'_default', u'Первая полоса', u'Власть и деньги']
|
||||
|
||||
encoding = 'cp1251'
|
||||
language = 'ru'
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
recursions = 0
|
||||
|
||||
conversion_options = {
|
||||
'comment' : description
|
||||
, 'tags' : category
|
||||
, 'publisher' : publisher
|
||||
, 'language' : language
|
||||
}
|
||||
|
||||
|
||||
keep_only_tags = [dict(name='td', attrs={'class' : ['second_content']})]
|
||||
|
||||
remove_tags_after = [dict(name='div', attrs={'class' : 'article_text'})]
|
||||
|
||||
remove_tags = [dict(name='div', attrs={'class' : ['sep', 'choice', 'articleRightTbl']})]
|
||||
|
||||
feeds = [u'http://www.vedomosti.ru/newspaper/out/rss.xml']
|
||||
|
||||
#base URL for relative links
|
||||
base_url = u'http://www.vedomosti.ru'
|
||||
|
||||
extra_css = 'h1 {font-size: 1.5em; margin: 0em 0em 0em 0em; text-align: center;}'\
|
||||
'h2 {font-size: 1.0em; margin: 0em 0em 0em 0em;}'\
|
||||
'h3 {font-size: 0.8em; margin: 0em 0em 0em 0em;}'\
|
||||
'.article_date {font-size: 0.5em; color: gray; font-family: monospace; text-align:right;}'\
|
||||
'.article_authors {font-size: 0.5em; color: gray; font-family: monospace; text-align:right;}'\
|
||||
'.article_img {width:100%; text-align: center; padding: 3px 3px 3px 3px;}'\
|
||||
'.article_img_desc {width:100%; text-align: center; font-size: 0.5em; color: gray; font-family: monospace;}'\
|
||||
'.article_desc {font-size: 1em; font-style:italic;}'
|
||||
|
||||
def parse_index(self):
|
||||
try:
|
||||
feedData = parse(self.feeds[0])
|
||||
if not feedData:
|
||||
raise NotImplementedError
|
||||
self.log("parse_index: Feed loaded successfully.")
|
||||
if feedData.feed.has_key('title'):
|
||||
self.title = feedData.feed.title
|
||||
self.log("parse_index: Title updated to: ", self.title)
|
||||
if feedData.feed.has_key('description'):
|
||||
self.description = feedData.feed.description
|
||||
self.log("parse_index: Description updated to: ", self.description)
|
||||
|
||||
def get_virtual_feed_articles(feed):
|
||||
if feeds.has_key(feed):
|
||||
return feeds[feed][1]
|
||||
self.log("Adding new feed: ", feed)
|
||||
articles = []
|
||||
feeds[feed] = (feed, articles)
|
||||
return articles
|
||||
|
||||
feeds = {}
|
||||
|
||||
#Iterate feed items and distribute articles using tags
|
||||
for item in feedData.entries:
|
||||
link = item.get('link', '');
|
||||
title = item.get('title', '');
|
||||
if '' == link or '' == title:
|
||||
continue
|
||||
article = {'title':title, 'url':link, 'description':item.get('description', ''), 'date':item.get('date', ''), 'content':''};
|
||||
if not item.has_key('tags'):
|
||||
get_virtual_feed_articles('_default').append(article)
|
||||
continue
|
||||
for tag in item.tags:
|
||||
addedToDefault = False
|
||||
term = tag.get('term', '')
|
||||
if '' == term:
|
||||
if (not addedToDefault):
|
||||
get_virtual_feed_articles('_default').append(article)
|
||||
continue
|
||||
get_virtual_feed_articles(term).append(article)
|
||||
|
||||
#Get feed list
|
||||
#Select sorted feeds first of all
|
||||
result = []
|
||||
for feedName in self.sortOrder:
|
||||
if (not feeds.has_key(feedName)): continue
|
||||
result.append(feeds[feedName])
|
||||
del feeds[feedName]
|
||||
result = result + feeds.values()
|
||||
|
||||
return result
|
||||
|
||||
except Exception, err:
|
||||
self.log(err)
|
||||
raise NotImplementedError
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return self.adeify_images(soup)
|
||||
|
||||
def postprocess_html(self, soup, first_fetch):
|
||||
#self.log('Original: ', soup.prettify())
|
||||
|
||||
#Find article
|
||||
contents = soup.find('div', {'class':['article_text']})
|
||||
if not contents:
|
||||
self.log('postprocess_html: article div not found!')
|
||||
return soup
|
||||
contents.extract()
|
||||
|
||||
#Find title
|
||||
title = soup.find('h1')
|
||||
if title:
|
||||
contents.insert(0, title)
|
||||
|
||||
#Find article image
|
||||
newstop = soup.find('div', {'class':['newstop']})
|
||||
if newstop:
|
||||
img = newstop.find('img')
|
||||
if img:
|
||||
imgDiv = Tag(soup, 'div')
|
||||
imgDiv['class'] = 'article_img'
|
||||
|
||||
if img.has_key('width'):
|
||||
del(img['width'])
|
||||
if img.has_key('height'):
|
||||
del(img['height'])
|
||||
|
||||
#find description
|
||||
element = img.parent.nextSibling
|
||||
|
||||
img.extract()
|
||||
imgDiv.insert(0, img)
|
||||
|
||||
while element:
|
||||
if not isinstance(element, Tag):
|
||||
continue
|
||||
nextElement = element.nextSibling
|
||||
if 'p' == element.name:
|
||||
element.extract()
|
||||
element['class'] = 'article_img_desc'
|
||||
imgDiv.insert(len(imgDiv.contents), element)
|
||||
element = nextElement
|
||||
|
||||
contents.insert(1, imgDiv)
|
||||
|
||||
#find article abstract
|
||||
abstract = soup.find('p', {'class':['subhead']})
|
||||
if abstract:
|
||||
abstract['class'] = 'article_desc'
|
||||
contents.insert(2, abstract)
|
||||
|
||||
#Find article authors
|
||||
authorsDiv = soup.find('div', {'class':['autors']})
|
||||
if authorsDiv:
|
||||
authorsP = authorsDiv.find('p')
|
||||
if authorsP:
|
||||
authorsP['class'] = 'article_authors'
|
||||
contents.insert(len(contents.contents), authorsP)
|
||||
|
||||
#Fix urls that use relative path
|
||||
urls = contents.findAll('a');
|
||||
if urls:
|
||||
for url in urls:
|
||||
if not url.has_key('href'):
|
||||
continue
|
||||
if '/' == url['href'][0]:
|
||||
url['href'] = self.base_url + url['href']
|
||||
|
||||
body = soup.find('td', {'class':['second_content']})
|
||||
if body:
|
||||
body.replaceWith(contents)
|
||||
|
||||
self.log('Result: ', soup.prettify())
|
||||
return soup
|
||||
|
@ -31,8 +31,9 @@ class WashingtonPost(BasicNewsRecipe):
|
||||
('Education', 'http://www.washingtonpost.com/wp-dyn/rss/education/index.xml'),
|
||||
('Style',
|
||||
'http://www.washingtonpost.com/wp-dyn/rss/print/style/index.xml'),
|
||||
('Sports',
|
||||
'http://feeds.washingtonpost.com/wp-dyn/rss/linkset/2010/08/19/LI2010081904067_xml'),
|
||||
('NFL Sports',
|
||||
'http://www.washingtonpost.com/wp-dyn/rss/sports/index/nfl/index.xml'),
|
||||
('Redskins', 'http://www.washingtonpost.com/wp-dyn/rss/sports/redskins/index.xml'),
|
||||
('Editorials', 'http://www.washingtonpost.com/wp-dyn/rss/linkset/2005/05/30/LI2005053000331.xml'),
|
||||
]
|
||||
|
||||
|
@ -12,6 +12,7 @@ class ZeitDe(BasicNewsRecipe):
|
||||
title = 'Zeit Online'
|
||||
description = 'Zeit Online'
|
||||
language = 'de'
|
||||
encoding = 'UTF-8'
|
||||
|
||||
__author__ = 'Martin Pitt, Sujata Raman, Ingo Paschke and Marc Toensing'
|
||||
|
||||
@ -43,7 +44,7 @@ class ZeitDe(BasicNewsRecipe):
|
||||
('Sport', 'http://newsfeed.zeit.de/sport/index'),
|
||||
]
|
||||
|
||||
extra_css = '.reaktion,.taglist,.comments,.reponse,.responsetitle,.responsebody,.reponse,.inline,.date{display:none;}li.date{display:block}'
|
||||
extra_css = '.excerpt{font-size:1em}.reaktion,.taglist,.comments,.reponse,.responsetitle,.responsebody,.reponse,.inline,.date{display:none;}li.date{display:block}'
|
||||
|
||||
#filter_regexps = [r'ad.de.doubleclick.net/']
|
||||
|
||||
@ -55,6 +56,16 @@ class ZeitDe(BasicNewsRecipe):
|
||||
ans = None
|
||||
return ans
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for tag in soup.findAll(name=['ul','li']):
|
||||
tag.name = 'div'
|
||||
|
||||
soup.html['xml:lang'] = self.lang
|
||||
soup.html['lang'] = self.lang
|
||||
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
|
||||
soup.head.insert(0,mtag)
|
||||
return soup
|
||||
|
||||
def get_cover_url(self):
|
||||
try:
|
||||
inhalt = self.index_to_soup('http://www.zeit.de/inhalt')
|
||||
|
63
resources/recipes/zeitde_sub.recipe
Normal file
@ -0,0 +1,63 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 mode: python -*-
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Steffen Siebert <calibre at steffensiebert.de>'
|
||||
__docformat__ = 'restructuredtext de'
|
||||
__version__ = '1.1'
|
||||
|
||||
"""
|
||||
Die Zeit EPUB
|
||||
"""
|
||||
|
||||
import os, urllib2, zipfile, re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
|
||||
class ZeitEPUBAbo(BasicNewsRecipe):
|
||||
|
||||
title = u'Zeit Online Premium'
|
||||
description = u'Das EPUB Abo der Zeit (needs subscription)'
|
||||
language = 'de'
|
||||
lang = 'de-DE'
|
||||
|
||||
__author__ = 'Steffen Siebert'
|
||||
needs_subscription = True
|
||||
|
||||
conversion_options = {
|
||||
'no_default_epub_cover' : True
|
||||
}
|
||||
|
||||
def build_index(self):
|
||||
domain = "http://premium.zeit.de"
|
||||
url = domain + "/abovorteile/cgi-bin/_er_member/p4z.fpl?ER_Do=getUserData&ER_NextTemplate=login_ok"
|
||||
|
||||
browser = self.get_browser()
|
||||
browser.add_password("http://premium.zeit.de", self.username, self.password)
|
||||
|
||||
try:
|
||||
browser.open(url)
|
||||
except urllib2.HTTPError:
|
||||
self.report_progress(0,_("Can't login to download issue"))
|
||||
raise ValueError('Failed to login, check your username and password')
|
||||
|
||||
response = browser.follow_link(text="DIE ZEIT als E-Paper")
|
||||
response = browser.follow_link(url_regex=re.compile('^http://contentserver.hgv-online.de/nodrm/fulfillment\\?distributor=zeit-online&orderid=zeit_online.*'))
|
||||
|
||||
tmp = PersistentTemporaryFile(suffix='.epub')
|
||||
self.report_progress(0,_('downloading epub'))
|
||||
tmp.write(response.read())
|
||||
tmp.close()
|
||||
|
||||
zfile = zipfile.ZipFile(tmp.name, 'r')
|
||||
self.report_progress(0,_('extracting epub'))
|
||||
|
||||
zfile.extractall(self.output_dir)
|
||||
|
||||
tmp.close()
|
||||
index = os.path.join(self.output_dir, 'content.opf')
|
||||
|
||||
self.report_progress(1,_('epub downloaded and extracted'))
|
||||
|
||||
return index
|
||||
|
@ -363,13 +363,16 @@
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="rtf:row">
|
||||
<xsl:element name="row">
|
||||
<xsl:element name="tr">
|
||||
<xsl:apply-templates/>
|
||||
</xsl:element>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="rtf:cell">
|
||||
<xsl:element name="cell">
|
||||
<xsl:element name="td">
|
||||
<xsl:if test="@class">
|
||||
<xsl:attribute name="class"><xsl:value-of select="@class"/></xsl:attribute>
|
||||
</xsl:if>
|
||||
<xsl:apply-templates/>
|
||||
</xsl:element>
|
||||
</xsl:template>
|
||||
|
24
resources/viewer/hyphenate/patterns/be.js
Normal file
@ -1,13 +1,13 @@
|
||||
// For questions about the Bengali hyphenation patterns
|
||||
// For questions about the Bengali hyphenation patterns
|
||||
// ask Santhosh Thottingal (santhosh dot thottingal at gmail dot com)
|
||||
Hyphenator.languages.bn = {
|
||||
Hyphenator.languages['bn'] = {
|
||||
leftmin : 2,
|
||||
rightmin : 2,
|
||||
shortestPattern : 1,
|
||||
longestPattern : 1,
|
||||
specialChars : unescape('আঅইঈউঊঋএঐঔকগখঘঙচছজঝঞটঠডঢণতথদধনপফবভমযরলশষসহিীাুূৃোোৈৌৗ্ঃং%u200D'),
|
||||
specialChars : unescape("আঅইঈউঊঋএঐঔকগখঘঙচছজঝঞটঠডঢণতথদধনপফবভমযরলশষসহিীাুূৃোোৈৌৗ্ঃং%u200D"),
|
||||
patterns : {
|
||||
2 : 'অ1আ1ই1ঈ1উ1ঊ1ঋ1এ1ঐ1ঔ1ি1া1ী1ু1ৃ1ে1ো1ৌ1ৗ1্2ঃ1ং11ক1গ1খ1ঘ1ঙ1চ1ছ1জ1ঝ1ঞ1ট1ঠ1ড1ঢ1ণ1ত1থ1দ1ধ1ন1প1ফ1ব1ভ1ম1য1র1ল1শ1ষ1স1হ',
|
||||
3 : '2ঃ12ং1'
|
||||
2 : "অ1আ1ই1ঈ1উ1ঊ1ঋ1এ1ঐ1ঔ1ি1া1ী1ু1ৃ1ে1ো1ৌ1ৗ1্2ঃ1ং11ক1গ1খ1ঘ1ঙ1চ1ছ1জ1ঝ1ঞ1ট1ঠ1ড1ঢ1ণ1ত1থ1দ1ধ1ন1প1ফ1ব1ভ1ম1য1র1ল1শ1ষ1স1হ",
|
||||
3 : "2ঃ12ং1"
|
||||
}
|
||||
};
|
||||
|
@ -1,11 +1,11 @@
|
||||
// For questions about the Czech hyphenation patterns
|
||||
// For questions about the Czech hyphenation patterns
|
||||
// ask Martin Hasoň (martin dot hason at gmail dot com)
|
||||
Hyphenator.languages.cs = {
|
||||
Hyphenator.languages['cs'] = {
|
||||
leftmin : 2,
|
||||
rightmin : 2,
|
||||
shortestPattern : 1,
|
||||
longestPattern : 6,
|
||||
specialChars : 'ěščřžýáíéúůťď',
|
||||
specialChars : "ěščřžýáíéúůťď",
|
||||
patterns : {
|
||||
2 : "a11f1g1k1n1pu11vy11zé11ňó11š1ť1ú1ž",
|
||||
3 : "_a2_b2_c2_d2_e2_g2_h2_i2_j2_k2_l2_m2_o2_p2_r2_s2_t2_u2_v2_z2_č2_é2_í2_ó2_š2_ú2_ž22a_a2da2ga2ia2ka2ra2sa2ta2u2av2aya2ča2ňa2ť2b_b1db1h1bib1j2bkb1m2bn1bob2z1bá1bí2bň2c_1ca2cc1ce1ci2cl2cn1coc2p2ctcy21cá1cí2cň1ců2d_1dad1bd1d1de1did1j2dkd1m2dn1dod1t1dud2v1dy1dá1dé1dě1dí2dň1dů1dý2e_e1ae1be1ee1ie2ke1o2ere1se1te1ue1áe2ňe1ře2šeú12f_f2l2fn2fr2fs2ft2féf2ú2g_2gngo12h_h2bh2c2hd2hkh2mh2rh1č2hňhř2h2ž2i_i1ai1bi1di1hi1ji1li1mi2ni1oi1ri1ti1xi1ái2ďi1éi1ói1ři2ši2ž2j_j2d1jij1j2jkj2m2jn2jp2jz2jď1jí2jž2k_k2dk2e2kf2kkk2l2kn2ks2kčk2ň2l_2lf2lg2lh1li2lj2lk2ll2ln2lp2lv2lz2lň1lů1lý2m_1ma1me2mf1mim2l2mn1mo2mp1mu2mv2mz2mčm2ž2n_2nb2nf2ngn1j2nk2nn2nz2nď2nónů22nž2o_o1ao1cog2o1ho1io1jo1lo1mo2no1oo1to2uo1xo2zo1čo2ňo1ř2p_2pkp2l2pn2pp2ptpá12pč2pš2pťqu22r_r1br1cr1d2rkr1l2rn2rrr1x2rzr1č2ró2rš2s_s2cs2d1se2sf1sis2js2k2sn1sos2p1sr2ss1sus2v1sé1sí2sň2sť1sůs2ž2t_1te2tf2tg1ti2tl2tm2tn1to2tpt2vt2č1té1tě2tř2tš1tů2u_u2b2ufu2ku2mu2nu2pu2ru2su2vu2zu2ču2ďu2ňu2šu2ž2v_2vkv2l2vm2vnv2p2vňwe22x_2xf2xnx1ty2ay2ey2sy2ňy2šyž22z_2zbz2ez2j2zl2ztz2v2zzzá12zč2zňz2řá1bá1dá1já1sá2ňá1řá2š2č_1ča2čb1če1či2čk2čn1čoč2p2čs1ču1čá1čí1čů2ď_1ďa1ďoé2dé2fé2lé2mé2sé2té2šé2žě1cě1lě2vě2zě1řě2šě2ťě2ží1bí1hí1jí1lí1rí1tí2ňí1ří2š2ň_2ňa2ňk2ňmň1só2z2ř_2řc2řdři12řk2řn1řoř2v2řz2řš2š_2šl2šnš2p2štš2vš2ň2ť_2ťk2ťm2ťtú2dú2kú2lú2nú2pú2tú2vú2zú2čú2žů1bů1cůt2ů2vů2zů2žý1bý1dý1hý1jý1lý2ný1rý1tý1uý1ř2ž_2žk2žl2žnž2v2žď2žň2žš",
|
||||
|
@ -1,10 +1,10 @@
|
||||
Hyphenator.languages.da = {
|
||||
'leftmin' : 2,
|
||||
'rightmin' : 2,
|
||||
'shortestPattern' : 2,
|
||||
'longestPattern' : 8,
|
||||
'specialChars' : 'æøå',
|
||||
'patterns' : {
|
||||
Hyphenator.languages['da'] = {
|
||||
leftmin : 2,
|
||||
rightmin : 2,
|
||||
shortestPattern : 2,
|
||||
longestPattern : 8,
|
||||
specialChars : "æøå",
|
||||
patterns : {
|
||||
3 : "a3ca1ea3ha3ja5oa5z1ba4bd1be1bib1j1bo4bsb5t3bub5w1by1ce3chck35cy3dad1b1ded1fd1gd3h1did3jd1kd1ld1m3dod1p1dud1v3dye3ee1he5x1faf1bf1d1fef1ff1gf1h1fif1k3fl1fof1p4ft1fuf1v3fy1gag1bg1d1geg3fg1gg1h1gi5gjg3kg1lg1m3gog3p1grg3v1gyi1ai3bi1ci3hi5ii5ji1uj5kj3rk5bk3hk1kk1tl1bl1fl3hl3jl1ll3r4ls1mam1bm3d1mem3fm1gm3h1mim3km1lm1mm1n3mom1r3my3nan1bn1c4nd1nen1f1nin1mn1n1non5pn3r4ns3nyn3zo3ao1co1eo5ho1jo3t3pap3dp3fp3mp3np1t1pup5vqu4r1br1fr1hr1lr1nr3pr1rs1d1ses1fs1msp44tbt1ht1mt1n4tsu1au1eu3iu5qv5hv5jv5kvl41vov5pv5t3vuy3ay3ey5o5bæ3dæ3døe3æe5å3fæ3fø3gæ3gåi3ø3kø3kå1mæ3mø3må3næ5nøo5åpå31sæ1sø5våæ3cæ3eæ5iæ5oø3eå1då1eå5hå3lå3t",
|
||||
4 : "_ae3_om1_po15adg5afgaf3r5afsa4gia4gya5kaa3kea5kraku5a3laa1lea1lial3ka1loa3lua1lya3nu3anva5pea3pia5poa1ra1arba1re5arga1ria3roa3saa3sca1sia3ska3soa1tea1tia1toa5tra1tua5vaa1vebe1k4b1n1br4bs5kb3sob1stby5s4c1c4ch_ci4oda4sd1d4de5ddi1edi5l4d1n4dopd5ovd5rud4smd4sud3tad1tedt5od5trdt5udub5e5ade3afe5age3ake1ale3ane5ape3ate3blebs3e1cie4do3effe3fr3efte3gue3inei5se3jee1kae3kee3kle5kre3kue1kve5kye3lee1lie3loe5lue3lyem1s4enne4noe5nue5ole3ope1ore3ovepi3e1pre3rae1ree1rier1ker3se5rye1tae1tee1tie3tje1toe3tre3tue1tye3ume3un3eure1vae3vee1vifej4f1s4f3taf1tef1tif5toge3sgi4bg5ovgs1ag4segs1pgs1vg3tag1teg1tig5tog3trgt4sg3udgun5g5yd4ha_he5s4hethi4ehi3s4h3thun4hvo4i3dri1eli1eni3erif3ri3gui1kai1keik1li5koi3kuik3vi3liil3ki1loil5ui3mu5infin3si3nui3odi3ogi5oki3olion4i3oti5pii5pri3rei3riir5ti3sci3sii4smis3pi1tai1tei1tii3toi3tri1tui3tyi1vai1vei1vij3agjds1j3lej3lijre5ju3s5kapk5au5kavki3ek1le3kluk4ny5kod1konko3v1kra5kryk1siks3kks1pks5vkt5s3kur1kus3kutk4vok4vu5lab5lam1latl3dr1le_5led3len1ler1les4leuli5ol1kel1kol3kyl5mul3op3lov4l3pl4psl5sjl1tal1tel3tilt3ol3trl3tulu5ll3vel3vimi3kmi4o4mopm1pem3pim3plm1pom3prm5skms3pms5vm3tam3tem3tim3trm1ud1mul4nak1naln3drne5aneo4n4go4n1h4nimni5on1ken1kon3krn3kun5kv4n1ln3sin1tan1ten1tin3ton1trn3tun3ty4n1vo4asod5sof5ro5ino3kao1keo3kuo3lao3leo1lio1loo3luo5ly1omron3kook5o3oro5ovo3piop3lop3rop3s4or_o3rior3kor5oo3sio3soo1teo5unov4s4pec3pen1perpe5spe3u4p5h1pla5pok3potp4rop3skp5sops4pp3stpu5b5py34rafr3dr1relr1guri1er3kar1ker1kir3kurmo4r5muro1bro3pr3orr1sar1sirs4nr3spr5sur3svr1ter1tir3tort3sr5tyr3ud5rutr3var1ver3viry4ss3af1sams3aps1ar1sat4s1bsdy4s4ed4s3h1sig5sis5sit5sius5ju4sk_1skes3kl5skys1les1lislo35slus5lys4myso5k5sol3sons1pls5r4s1s44st_5stj3sto1strs1ud3suls3un3surs3ve3s4y5ta_1tag3tegteo14t1f6t3g3tid4t3k4t1l4t3pt4ra1tryt3sit3st4t1t5turt5ve1typ5udlud5rud3s3udvugs3u5guu5klu1lau1leu5lyu5peup5lu3rau3reu3rous5au3siu5sous5vu1teu1tiu1tout5r5u5vva5d1vedve3s5vet1visv3lev5livls1v5rev3stv5suy5dry3key5kiy3koy3kvy5liy5loy5muyns5y1pey3piy3rey3riy3siy3tiy5vezi5o_så3a3tøa5væe3læe3løe3røe5tæe5tøe1vægiø4g4søg5så3gø1i5tæl3væ5løsm5tån3kæn5tæo5læor3ø5præ5pædr5kær5tær5tør3vær5æl4røn5rør3rådr5års4kå3slås4næ5stø1stås5økti4øt4søt5såt3væu3læy5vææb3læg5aægs5æ5kvæ1reæ3riær5sæ5siæ3soæ3veøde5ø1jeø3keø3leøms5ø1reø3riør5oø1veå3reå5sk",
|
||||
5 : "_an3k_an1s_be1t_her3_ove4_til3_yd5rab5le3abstaf4ria4gefag5inag5si3agtiais5t4alkval5siam4paar5af3a3spa3stea3stia1ta1ato5vba4tibe3robe5rube1s4be1trbi5skbo4grbo3rabo5rece5ro4d3afde5sk3drif3drivd5rosds5ands5ind1skidsu5lds5viea4laed5aredde4ed5raed3re4e1koek5sa3ekspe3ladel3akel3are1lase4lek3elem5elimel5sae4maden5ake4nanen3soer3afe4rage4rake4ref5erhve4ribero5der5over5tre3rumer5unfa4cefags3fejl1fo4rif5tvig3artgi3st4g5omgsha4g5slags3org4strheds3hi4n5ho5koho5vehund3i4bleids5ki3et_ik3reik5riiks5tik4tui3lagil3egil5ejil5elind3tings1in4svions1i5o5ri3plii3stii5suakel5ske3skke5stki3stk5lakko3ra3kortks3ank3stek5stuk4tarkti4ekt5relad3r5lagdld3st4lelele4molfin4l1go1li4galo4du4l5orlses1ls5inl4taf4m5ejm5ingmmen5mo4da4m5ovmse5sms5inm3stemu1lind5sind5sknd5spne4dan3erkn5erlne5slne5stni3stn3ordn1skuns3pon1stan5stint4suob3lio4dinod5riod5uno4geko4gelo4g5oog5reog5sk3optaor1an3ordnord5so3re_o3rego3reko3rero3retor5imor3slor3stpa5ghp5anlpe1rap4lan4ple_4pler4ples4p5p41procp5ulera5is4rarbrd4s34reksre5la5rese4ressre3st5rettri5la4rimor4ing4rinp4rintrk3sorre5sr5skrr5stur5talrt3rer5trir5trosa4ma5s4erse4se4s1g4si4bls5int1skabsk5s44snins4nit5som_3somms5oms5somt4s1op3spec4sper3s4pi1stanst5as3stat1stav1ste_1sted3stel1sten5step3stes5stetst5om1sy1s4tanvteds55tekn5termte5roti4enti3stto5rato1reto1ritor4m4trestro5vts4pats5prts5ult5udsue4t5uk4tauk4tru1reru5skaut5s43varm4v5omyk3liyk4s5yr3eky5t3r_ær5i_øv3rbrød35drøvdstå4er5øn4n5æb4s5ænså4r53værd1værkæ4gekæ4g5rælle4æn1drær4maær4moæ3steøn3støn4t3ørne3års5t",
|
||||
|
20
resources/viewer/hyphenate/patterns/el-monoton.js
Normal file
@ -0,0 +1,20 @@
|
||||
// Hyphenation patterns for Modern Monotonic Greek.
|
||||
// Created by Dimitrios Filippou with some ideas borrowed from
|
||||
// Yannis Haralambous, Kostis Dryllerakis and Claudio Beccari.
|
||||
// From http://tug.org/svn/texhyphen/branches/ptex/hyph-utf8/tex/generic/hyph-utf8/patterns/tex/hyph-el-monoton.tex
|
||||
// Converted by Pablo Rodríguez (hyphenator at pragmata dot tk)
|
||||
Hyphenator.languages['el-monoton'] = Hyphenator.languages['el'] = {
|
||||
leftmin : 2,
|
||||
rightmin : 2,
|
||||
shortestPattern : 1,
|
||||
longestPattern : 7,
|
||||
specialChars : "αεηιουωϊϋάέήίόύώΐΰίάύέήόώβγκδζθλμπντξρσϲςφχψ'ʼ᾿’᾽",
|
||||
patterns : {
|
||||
2 : "α1ε1η1ι1ο1υ1ω1ϊ1ϋ1ά1έ1ή1ί1ό1ύ1ώ1ΐ1ΰ14'4ʼ4᾿",
|
||||
3 : "α2ια2ία2ίά2ιά2ιά2ϊά2ϊα2υα2ύα2ύά3υά3υε2ιε2ίε2ίέ2ιέ2ιέ2ϊέ2ϊε2υε2ύε2ύέ3υέ3υη2υη2ύη2ύή3υή3υο2ιο2ίο2ίό2ιό2ιό2ϊό2ϊο2υο2ύο2ύό3υό3υυ2ιυ2ίυ2ίύ3ιύ3ια2ηα2ϊα2ϋε2ϊε2ϋο2ηο2ϊι2αι2άι2άι2ει2έι2έι2οι2όι2όι2ωι2ώι2ώ_ι3_ί3_ί3η2αη2άη2άη2εη2έη2έη2οη2όη2όη2ωη2ώη2ώ_η3_ή3_ή3υ2αυ2άυ2άυ2ου2όυ2όυ2ωυ2ώυ2ώ_υ3_ύ3_ύ34β_4γ_4δ_4ζ_4θ_4κ_4λ_4μ_4ν_4ξ_4π_4ρ_4σ_4ϲ_4ς_4τ_4φ_4χ_4ψ_4β'4βʼ4β᾿4γ'4γʼ4γ᾿4δ'4δʼ4δ᾿4ζ'4ζʼ4ζ᾿4θ'4θʼ4θ᾿4κ'4κʼ4κ᾿4λ'4λʼ4λ᾿4μ'4μʼ4μ᾿4ν'4νʼ4ν᾿4ξ'4ξʼ4ξ᾿4π'4πʼ4π᾿4ρ'4ρʼ4ρ᾿4σ'4σʼ4σ᾿4ϲ'4ϲʼ4ϲ᾿4τ'4τʼ4τ᾿4φ'4φʼ4φ᾿4χ'4χʼ4χ᾿4ψ'4ψʼ4ψ᾿_β4_γ4_δ4_ζ4_θ4_κ4_λ4_μ4_ν4_ξ4_π4_ρ4_σ4_ϲ4_τ4_φ4_χ4_ψ4",
|
||||
4 : "ά3η_ά3η_ά3ι_ά3ι_ο2ειό3η_ό3η_ό3ι_ό3ι_4γκ_4μπ_4ντ_4τζ_4τσ_4τϲ_4τς_4μπ'4μπʼ4μπ᾿4ντ'4ντ’4ντ᾿4τζ'4τζʼ4τζ᾿4τσ'4τσʼ4τσ᾽4τϲ'4τϲʼ4τϲ᾿4β1β4γ1γ4δ1δ4ζ1ζ4θ1θ4κ1κ4λ1λ4μ1μ4ν1ν4π1π4ρ1ρ4σ1σ4ϲ1ϲ4τ1τ4φ1φ4χ1χ4ψ1ψ4β1ζ4β1θ4β1κ4β1μ4β1ν4β1ξ4β1π4β1σ4β1ϲ4β1τ4β1φ4β1χ4β1ψ4γ1β4γ1ζ4γ1θ4γ1μ4γ1ξ4γ1π4γ1σ4γ1ϲ4γ1τ4γ1φ4γ1χ4γ1ψ4δ1β4δ1γ4δ1ζ4δ1θ4δ1κ4δ1λ4δ1ξ4δ1π4δ1σ4δ1ϲ4δ1τ4δ1φ4δ1χ4δ1ψ4ζ1β4ζ1γ4ζ1δ4ζ1θ4ζ1κ4ζ1λ4ζ1μτζ2μ4ζ1ν4ζ1ξ4ζ1π4ζ1ρ4ζ1σ4ζ1ϲ4ζ1τ4ζ1φ4ζ1χ4ζ1ψ4θ1β4θ1γ4θ1δ4θ1ζ4θ1κ4θ1μσθ2μϲθ2μ4θ1ξ4θ1π4θ1σ4θ1ϲ4θ1τ4θ1φ4θ1χ4θ1ψ4κ1β4κ1γ4κ1δ4κ1ζ4κ1θ4κ1μ4κ1ξ4κ1π4κ1σ4κ1ϲ4κ1φ4κ1χ4κ1ψ4λ1β4λ1γ4λ1δ4λ1ζ4λ1θ4λ1κ4λ1μ4λ1ν4λ1ξ4λ1π4λ1ρ4λ1σ4λ1ϲ4λ1τ4λ1φ4λ1χ4λ1ψ4μ1β4μ1γ4μ1δ4μ1ζ4μ1θ4μ1κ4μ1λ4μ1ξ4μ1ρ4μ1σ4μ1ϲ4μ1τ4μ1φ4μ1χ4μ1ψ4ν1β4ν1γ4ν1δ4ν1ζ4ν1θ4ν1κ4ν1λ4ν1μ4ν1ξ4ν1π4ν1ρ4ν1σ4ν1ϲ4ν1φ4ν1χ4ν1ψ4ξ1β4ξ1γ4ξ1δ4ξ1ζ4ξ1θ4ξ1κ4ξ1λ4ξ1μ4ξ1ν4ξ1π4ξ1ρ4ξ1σ4ξ1ϲ4ξ1τ4ξ1φ4ξ1χ4ξ1ψ4π1β4π1γ4π1δ4π1ζ4π1θ4π1κ4π1μ4π1ξ4π1σ4π1ϲ4π1φ4π1χ4π1ψ4ρ1β4ρ1γ4ρ1δ4ρ1ζ4ρ1θ4ρ1κ4ρ1λ4ρ1μ4ρ1ν4ρ1ξ4ρ1π4ρ1σ4ρ1ϲ4ρ1τ4ρ1φ4ρ1χ4ρ1ψ4σ1δ4ϲ1δ4σ1ζ4ϲ1ζ4σ1ν4ϲ1ν4σ1ξ4ϲ1ξ4σ1ρ4ϲ1ρ4σ1ψ4ϲ1ψ4τ1β4τ1γ4τ1δ4τ1θ4τ1κ4τ1ν4τ1ξ4τ1π4τ1φστ2φϲτ2φ4τ1χ4τ1ψ4φ1β4φ1γ4φ1δ4φ1ζ4φ1κ4φ1μ4φ1ν4φ1ξ4φ1π4φ1σ4φ1ϲ4φ1χ4φ1ψ4χ1β4χ1γ4χ1δ4χ1ζ4χ1κ4χ1μ4χ1ξ4χ1π4χ1σ4χ1ϲ4χ1φ4χ1ψ4ψ1β4ψ1γ4ψ1δ4ψ1ζ4ψ1θ4ψ1κ4ψ1λ4ψ1μ4ψ1ν4ψ1ξ4ψ1π4ψ1ρ4ψ1σ4ψ1ϲ4ψ1τ4ψ1φ4ψ1χ4βρ_4γλ_4κλ_4κτ_6κς_6κϲ_4κσ_4λς_4λϲ_4λσ_4μς_4μϲ_4μσ_4νς_4νϲ_4νσ_4ρς_4ρϲ_4ρσ_4σκ_4ϲκ_4στ_4ϲτ_4τλ_4τρ_4φτ_",
|
||||
5 : "ο3ϊ3όο3ϊ3ό4γ1κτ4μ1πτ4ν1τζ4ν1τσ4ν1τϲ4γκς_4γκϲ_4γκσ_4μπλ_4μπν_4μπρ_4ντς_4ντϲ_4ντσ_",
|
||||
6 : "4ρ5γ2μ4ρ5θ2μ4λ5κ2μ4ρ5κ2μ4ν5κ2φ4γ5ξ2τ4ρ5ξ2τ4ρ5φ2ν4ρ5χ2μ4μ5ψ2τ4γ5κ2φ4γκ1ντ4γκ1τζ4γκ1τσ4γκ1τϲ4μπ1ντ4μπ1τζ4μπ1τσ4μπ1τϲ4ντ1μπ4τσ1γκ4τϲ1γκ4τσ1μπ4τϲ1μπ4τσ1ντ4τϲ1ντ",
|
||||
10 : "4χτ_4γκ1μπ"
|
||||
}
|
||||
};
|
26
resources/viewer/hyphenate/patterns/el-polyton.js
Normal file
@ -0,0 +1,26 @@
|
||||
// Hyphenation patterns for Modern Polytonic Greek.
|
||||
// Created by Dimitrios Filippou with some ideas borrowed from
|
||||
// Yannis Haralambous, Kostis Dryllerakis and Claudio Beccari.
|
||||
// From http://tug.org/svn/texhyphen/branches/ptex/hyph-utf8/tex/generic/hyph-utf8/patterns/tex/hyph-el-polyton.tex
|
||||
// Converted by Pablo Rodríguez (hyphenator at pragmata dot tk)
|
||||
Hyphenator.languages['el-polyton'] = {
|
||||
leftmin : 2,
|
||||
rightmin : 2,
|
||||
shortestPattern : 1,
|
||||
longestPattern : 11,
|
||||
specialChars : "αεηιουωϊϋἀἁἂἃἄἅἆἇἐἑἒἓἔἕἠἡἢἣἤἥἦἧἰἱἲἳἴἵἶἷὀὁὂὃὄὅὐὑὒὓὔὕὖὗὠὡὢὣὤὥὦὧὰὲὴὶὸὺὼᾀᾁᾂᾃᾄᾅᾆᾇᾐᾑᾒᾓᾔᾕᾖᾗᾠᾡᾢᾣᾤᾥᾦᾧᾲᾳᾴᾶᾷῂῃῄῆῇῒῖῗῢῦῧῲῳῴῶῷάέήίόύώΐΰάέήίόύώΐΰβγκδζθλμπντξρσϲςφχψ'ʼ᾿’᾽ῤῥ",
|
||||
patterns : {
|
||||
2 : "α1ε1η1ι1ο1υ1ω1ϊ1ϋ1ἀ1ἁ1ἂ1ἃ1ἄ1ἅ1ἆ1ἇ1ἐ1ἑ1ἒ1ἓ1ἔ1ἕ1ἠ1ἡ1ἢ1ἣ1ἤ1ἥ1ἦ1ἧ1ἰ1ἱ1ἲ1ἳ1ἴ1ἵ1ἶ1ἷ1ὀ1ὁ1ὂ1ὃ1ὄ1ὅ1ὐ1ὑ1ὒ1ὓ1ὔ1ὕ1ὖ1ὗ1ὠ1ὡ1ὢ1ὣ1ὤ1ὥ1ὦ1ὧ1ὰ1ὲ1ὴ1ὶ1ὸ1ὺ1ὼ1ᾀ1ᾁ1ᾂ1ᾃ1ᾄ1ᾅ1ᾆ1ᾇ1ᾐ1ᾑ1ᾒ1ᾓ1ᾔ1ᾕ1ᾖ1ᾗ1ᾠ1ᾡ1ᾢ1ᾣ1ᾤ1ᾥ1ᾦ1ᾧ1ᾲ1ᾳ1ᾴ1ᾶ1ᾷ1ῂ1ῃ1ῄ1ῆ1ῇ1ῒ1ῖ1ῗ1ῢ1ῦ1ῧ1ῲ1ῳ1ῴ1ῶ1ῷ1ά1έ1ή1ί1ό1ύ1ώ1ΐ1ΰ1ά1έ1ή1ί1ό1ύ1ώ1ΐ1ΰ16'6ʼ6᾿",
|
||||
3 : "α2ια2ία2ία2ὶα2ῖα2ἰα2ἴα2ἲα2ἶα2ἱα2ἵα2ἳα2ἷά2ιά2ιά2ϊά2ϊα2υα2ύα2ύα2ὺα2ῦα2ὐα2ὔα2ὒα2ὖα2ὑα2ὕα2ὓα2ὗά3υά3υε2ιε2ίε2ίε2ὶε2ῖε2ἰε2ἴε2ἲε2ἶε2ἱε2ἵε2ἳε2ἷέ2ιέ2ιέ2ϊέ2ϊε2υε2ύε2ύε2ὺε2ῦε2ὐε2ὔε2ὒε2ὖε2ὑε2ὕε2ὓε2ὗέ3υέ3υη2υη2ύη2ύη2ὺη2ῦη2ὐη2ὔη2ὒη2ὖη2ὑη2ὕη2ὓη2ὗο2ιο2ίο2ίο2ὶο2ῖο2ἰο2ἴο2ἲο2ἶο2ἱο2ἵο2ἳο2ἷό2ιό2ιό2ϊό2ϊο2υο2ύο2ύο2ὺο2ῦο2ὐο2ὔο2ὒο2ὖο2ὑο2ὕο2ὓο2ὗό3υό3υυ2ιυ2ίυ2ίυ2ὶυ2ῖυ2ἰυ2ἴυ2ἲυ2ἶυ2ἱυ2ἵυ2ἳυ2ἷα2ηα2ϊα2ϋά3ϋά3ϋε2ηέ2ηέ2ηε2ϊε2ϋό2ηό2ηο2ϊω2ιὠ2ιι2αι2άι2άι2ὰι2ᾶι2ει2έι2έι2ὲι2οι2όι2όι2ὸι2ωι2ώι2ώι2ὼι2ῶ_ί3_ί3_ῖ3_ἰ3_ἱ3η2αῃ2αη2άη2άη2ὰη2ᾶῃ2άῃ2άῃ2ὰῃ2ᾶη2εῃ2εη2έη2έη2ὲῃ2έῃ2έῃ2ὲη2οῃ2οη2όη2όη2ὸῃ2όῃ2όῃ2ὸη2ωῃ2ωη2ώη2ώη2ὼη2ῶῃ2ώῃ2ώῃ2ὼῃ2ῶ_ή3_ή3_ῆ3_ἠ3_ἡ3υ2αυ2άυ2άυ2ὰυ2ᾶυ2ευ2έυ2έυ2ὲυ2ου2όυ2όυ2ὸυ2ωυ2ώυ2ώυ2ὼυ2ῶ_ύ3_ύ3_ῦ3_ὑ36β_6γ_6δ_6ζ_6θ_6κ_6λ_6μ_6ν_6ξ_6π_6ρ_6σ_6ϲ_6ς_6τ_6φ_6χ_6ψ_6β'6βʼ6β᾿6γ'6γʼ6γ᾿6δ'6δʼ6δ᾿6ζ'6ζʼ6ζ᾿6θ'6θʼ6θ᾿6κ'6κʼ6κ᾿6λ'6λʼ6λ᾿6μ'6μʼ6μ᾿6ν'6νʼ6ν᾿6ξ'6ξʼ6ξ᾿6π'6πʼ6π᾿6ρ'6ρʼ6ρ᾿6σ'6σʼ6σ᾿6ϲ'6ϲʼ6ϲ᾿6τ'6τʼ6τ᾿6φ'6φʼ6φ᾿6χ'6χʼ6χ᾿6ψ'6ψʼ6ψ᾿_β6_γ6_δ6_ζ6_θ6_κ6_λ6_μ6_ν6_ξ6_π6_ρ6_σ6_ϲ6_τ6_φ6_χ6_ψ6",
|
||||
4 : "ά3η_ά3η_ά3ι_ά3ι_ά3ϊ_ά3ϊ_ό2ειό2ειό3η_ό3η_ό3ι_ό3ι_ό3ϊ_ό3ϊ_6γκ_6μπ_6ντ_6τζ_6τσ_6τϲ_6τς_6μπ'6μπʼ6μπ᾿6ντ'6ντ’6ντ᾿6τζ'6τζʼ6τζ᾿6τσ'6τσʼ6τσ᾽6τϲ'6τϲʼ6τϲ᾿4β1β4γ1γ4δ1δ4ζ1ζ4θ1θ4κ1κ4λ1λ4μ1μ4ν1ν4π1π4ρ1ρ4ῤ1ῥ4σ1σ4ϲ1ϲ4τ1τ4φ1φ4χ1χ4ψ1ψ4β1ζ4β1θ4β1κ4β1μ4β1ν4β1ξ4β1π4β1σ4β1ϲ4β1τ4β1φ4β1χ4β1ψ4γ1β4γ1ζ4γ1θ4γ1κ4γ1μ4γ1ξ4γ1π4γ1σ4γ1ϲ4γ1τ4γ1φ4γ1χ4γ1ψ4δ1β4δ1γ4δ1ζ4δ1θ4δ1κ4δ1λ4δ1ξ4δ1π4δ1σ4δ1ϲ4δ1τ4δ1φ4δ1χ4δ1ψ4ζ1β4ζ1γ4ζ1δ4ζ1θ4ζ1κ4ζ1λ4ζ1μτζ2μ4ζ1ν4ζ1ξ4ζ1π4ζ1ρ4ζ1σ4ζ1ϲ4ζ1τ4ζ1φ4ζ1χ4ζ1ψ4θ1β4θ1γ4θ1δ4θ1ζ4θ1κ4θ1μσθ2μϲθ2μ4θ1ξ4θ1π4θ1σ4θ1ϲ4θ1τ4θ1φ4θ1χ4θ1ψ4κ1β4κ1γ4κ1δ4κ1ζ4κ1θ4κ1ξ4κ1π4κ1σ4κ1ϲ4κ1φ4κ1χ4κ1ψ4λ1β4λ1γ4λ1δ4λ1ζ4λ1θ4λ1κ4λ1μ4λ1ν4λ1ξ4λ1π4λ1ρ4λ1σ4λ1ϲ4λ1τ4λ1φ4λ1χ4λ1ψ4μ1β4μ1γ4μ1δ4μ1ζ4μ1θ4μ1κ4μ1λ4μ1ξ4μ1π4μ1ρ4μ1σ4μ1ϲ4μ1τ4μ1φ4μ1χ4μ1ψ4ν1β4ν1γ4ν1δ4ν1ζ4ν1θ4ν1κ4ν1λ4ν1μ4ν1ξ4ν1π4ν1ρ4ν1σ4ν1ϲ4ν1τ4ν1φ4ν1χ4ν1ψ4ξ1β4ξ1γ4ξ1δ4ξ1ζ4ξ1θ4ξ1κ4ξ1λ4ξ1μ4ξ1ν4ξ1π4ξ1ρ4ξ1σ4ξ1ϲ4ξ1τ4ξ1φ4ξ1χ4ξ1ψ4π1β4π1γ4π1δ4π1ζ4π1θ4π1κ4π1μ4π1ξ4π1σ4π1ϲ4π1φ4π1χ4π1ψ4ρ1β4ρ1γ4ρ1δ4ρ1ζ4ρ1θ4ρ1κ4ρ1λ4ρ1μ4ρ1ν4ρ1ξ4ρ1π4ρ1σ4ρ1ϲ4ρ1τ4ρ1φ4ρ1χ4ρ1ψ4σ1δ4ϲ1δ4σ1ζ4ϲ1ζ4σ1ν4ϲ1ν4σ1ξ4ϲ1ξ4σ1ρ4ϲ1ρ4σ1ψ4ϲ1ψ4τ1β4τ1γ4τ1δ4τ1θ4τ1ν4τ1ξ4τ1π4τ1φστ2φϲτ2φ4τ1χ4τ1ψ4φ1β4φ1γ4φ1δ4φ1ζ4φ1κ4φ1ν4φ1ξ4φ1π4φ1σ4φ1ϲ4φ1χ4φ1ψ4χ1β4χ1γ4χ1δ4χ1ζ4χ1κ4χ1μ4χ1ξ4χ1π4χ1σ4χ1ϲ4χ1φ4χ1ψ4ψ1β4ψ1γ4ψ1δ4ψ1ζ4ψ1θ4ψ1κ4ψ1λ4ψ1μ4ψ1ν4ψ1ξ4ψ1π4ψ1ρ4ψ1σ4ψ1ϲ4ψ1τ4ψ1φ4ψ1χβγ2κσγ2κϲγ2κσμ2πϲμ2πμν2τσν2τϲν2τ6βρ_6γλ_6κλ_6κτ_6κς_6κϲ_6κσ_6λς_6λϲ_6λσ_6μς_6μϲ_6μσ_6νς_6νϲ_6νσ_6ρς_6ρϲ_6ρσ_6σκ_6ϲκ_6στ_6ϲτ_6τλ_6τρ_6φτ_6χτ_",
|
||||
5 : "ο3ϊ3όο3ϊ3όο3ϊ3ὸβ5ν2τζ5ν2τλ5ν2τρ5ν2τ",
|
||||
6 : "4ρ5γ2μ4ρ5θ2μ4λ5κ2μ4ρ5κ2μ4γ5κ2φ4ν5κ2φ4γ5ξ2τ4ρ5ξ2τ4ρ5φ2ν4ρ5χ2μ4μ5ψ2τ4λ5γ2κ4ν5γ2κ4ρ5γ2κ4τ5γ2κ4ζ5μ2π4λ5μ2π4ν5μ2π4ρ5μ2πἄ5μ2ακἀ5μ2πρὄ5μ2ποὀ5μ2ποὀ5ν2τάὀ5ν2τάὀ5ν2τὰὀ5ν2τᾶ6μ2πλ_6μ2πν_6μ2πρ_",
|
||||
7 : "ἰ5γ2κου_ξε5γ2κ_ξέ5γ2κ_ξέ5γ2κ_σι5γ2κ_ϲι5γ2κἀ5μ2πάκἀ5μ2πάκἀ5μ2πανἀ5μ2πάρἀ5μ2πάρἀ5μ2πᾶρἀ5μ2παρἀρα5μ2πἰ5μ2πρα_κε5μ2π_λό5μ2π_λό5μ2π5μ2πέη_5μ2πέη_5μ2πεη_5μ2πογι_ξε5μ2π_ξέ5μ2π_ξέ5μ2π_ρε5μ2π_ρέ5μ2π_ρέ5μ2π_ρο5μ2πρό5μ2παρό5μ2παρό5μ2περό5μ2περό5μ2πωρό5μ2πωρο5μ2πῶρο5μ2παρο5μ2περο5μ2πωσό5μ2πασό5μ2παϲό5μ2παϲό5μ2πασό5μ2πεσό5μ2πεϲό5μ2πεϲό5μ2πεσο5μ2πῶϲο5μ2πῶσό5μ2πωσό5μ2πωϲό5μ2πωϲό5μ2πωσο5μ2παϲο5μ2πασο5μ2πεϲο5μ2πεσο5μ2πωϲο5μ2πω_τα5μ2π_χα5μ2π_χό5μ2π_χό5μ2π_ξε5ν2τ_ξέ5ν2τ_ξέ5ν2τ6γ2κ1τζ6γ2κ1τσ6γ2κ1τϲ6μ2π1τζ6μ2π1τσ6μ2π1τϲ6τσ5γ2κ6τϲ5γ2κ6τσ5μ2π6τϲ5μ2π6τσ5ν2τ6τϲ5ν2τ",
|
||||
8 : "ἐμι5γ2κρ_μπα5γ2κ_μπι5γ2κ_σπά5γ2κ_σπά5γ2κ_ϲπά5γ2κ_ϲπά5γ2κ_σπα5γ2κ_ϲπα5γ2κ_φιό5γ2κ_φιό5γ2κ_φιο5γ2κἀ6μ3πάριἀ6μ3πάριἀ6μ3παρι_γά5μ2πι_γά5μ2πι_γα5μ2πι_ζεϊ5μ2π_κό5μ2πρ_κό5μ2πρ_κο5μ2πρ_λι5μ2πρ5μ2πέης_5μ2πέης_5μ2πέηϲ_5μ2πέηϲ_5μ2πεης_5μ2πεηϲ_5μ2πέησ_5μ2πέησ_5μ2πεησ__μπι5μ2π_τρο6μ3π_τρό6μ3π_τρό6μ3π_ρου5μ2π_σέ5μ2πρ_σέ5μ2πρ_ϲέ5μ2πρ_ϲέ5μ2πρ_σνο5μ2π_ϲνο5μ2π_σού5μ2π_σού5μ2π_ϲού5μ2π_ϲού5μ2π_σου5μ2π_ϲου5μ2π_τζά5μ2π_τζά5μ2π_τζα5μ2π_τζι5μ2π_τό5μ2πρ_τό5μ2πρ_το5μ2πρ_φρα5μ2πἀ5ν2τάτζἀ5ν2τάτζ_βί5ν2τε_βί5ν2τε_βι5ν2τε_κα5ν2τρ_μαϊ5ν2τ_μπε5ν2τ_μπι5ν2τ_ντα5ν2τ5ν2τίβαν5ν2τίβαν_ρε5ν2τί_ρε5ν2τί_ρε5ν2τι_ροῦ5ν2τ_ρού5ν2τ_ρού5ν2τ_χα5ν2το_χα5ν2τρ_χά5ν2τρ_χά5ν2τρ6γ2κ5μ2π6γ2κ5ν2τ6μ2π5ν2τ6ν2τ5μ2π",
|
||||
9 : "5γ2κραντ_ἴντρι5γ2κἰντρι5γ2κ_μα5γ2κιό_μα5γ2κιό_ντά5γ2κλ_ντά5γ2κλ_ντα5γ2κλἀλα5μ2πουἀρλού5μ2πἀρλού5μ2πἀρλοῦ5μ2πἀρλου5μ2π_βό5μ2πιρ_βό5μ2πιρ_βο5μ2πιρ_κα5μ2πάδ_κα5μ2πάδ_κα5μ2πίν_κα5μ2πίν_κα5μ2πῖν_κα5μ2πιν_κά5μ2ποτ_κά5μ2ποτ_κα5μ2πότ_κα5μ2πότ_κα5μ2ποτ_καου5μ2π_καρα5μ2π5μ2πα5μ2π5μ2πά5μ2π5μ2πά5μ2π5μ2πέ5μ2π5μ2πέ5μ2π5μ2πε5μ2π_νό5μ2πελ_νό5μ2πελ_νο5μ2πελ_ντό5μ2πρ_ντό5μ2πρ_ντο5μ2πρ_σα2μ5ποτ_ϲα2μ5ποτ_τε5μ2πεσ_τε5μ2πεϲ_τζου5μ2π_τσά5μ2πα_τσά5μ2πα_τϲά5μ2πα_τϲά5μ2πα_τσα5μ2πα_τϲα5μ2παἀτρα5ν2τέἀτρα5ν2τέἀτρα5ν2τὲ_γιβε5ν2τ_γκάι5ν2τ_γκάι5ν2τ_γκάϊ5ν2τ_γκάϊ5ν2τ_γκαϊ5ν2τ_κα5ν2ταΐ_κα5ν2ταΐ_κα5ν2ταϊ_μα5ν2τάμ_μα5ν2τάμ_μα5ν2τὰμ_μα5ν2ταμ_μα5ν2τέμ_μα5ν2τέμ_μα5ν2τεμ_μεϊ5ν2τά_μεϊ5ν2τά_μεϊ5ν2τα_μο5ν2τέλ_μο5ν2τέλ_μο5ν2τελμο5ν2τέρνμο5ν2τέρνμο5ν2τερν_νισα5ν2τ_νιϲα5ν2τ_ρεζε5ν2τ_σε5ν2τέφ_σε5ν2τέφ_ϲε5ν2τέφ_ϲε5ν2τέφ_σε5ν2τεφ_ϲε5ν2τεφ_σε5ν2τοῦ_ϲε5ν2τοῦ_σε5ν2τού_σε5ν2τού_ϲε5ν2τού_ϲε5ν2τού_σε5ν2του_ϲε5ν2του_τσα5ν2τί_τσα5ν2τί_τϲα5ν2τί_τϲα5ν2τί_τσα5ν2τι_τϲα5ν2τι",
|
||||
10 : "_γιου5γ2κο_καρα5γ2κι_χούλι5γ2κ_χούλι5γ2κ_χουλι5γ2κ_γιαρα5μ2π_καλα5μ2πα_καλί5μ2πρ_καλί5μ2πρ_καλι5μ2πρ_κα5μ2παρέ_κα5μ2παρέ_κα5μ2παρὲ_κα5μ2παρε_καρνα5μ2π_κολι5μ2πρ_κου5μ2πού_κου5μ2πού_κου5μ2ποῦ_κου5μ2που5μ2πέηδες_5μ2πέηδες_5μ2πέηδεϲ_5μ2πέηδεϲ_5μ2πέηδεσ_5μ2πέηδεσ_5μ2πέηδων_5μ2πέηδων__μπό5μ2πιρ_μπό5μ2πιρ_μπο5μ2πιρ_μπο5μ2πότ_μπο5μ2πότ_μπο5μ2ποτ_σκα5μ2παβ_ϲκα5μ2παβ_ταβλα5μ2π_τζανα5μ2π_τρα5μ2πάλ_τρα5μ2πάλ_τρα5μ2παλ_φά5μ2πρικ_φά5μ2πρικ_φα5μ2πρικ_μπαλά5ν2τ_μπαλά5ν2τ_μπαλα5ν2τ_μπα5ν2ταν_μπου5ν2τα_μπου5ν2τρ",
|
||||
11 : "_καρα6μ3πόλ_καρα6μ3πόλ_καρα6μ3πολ_κολού5μ2πρ_κολού5μ2πρ_κολοῦ5μ2πρ_κολου5μ2πρ_κο6μ3πρέσσ_κο6μ3πρέσσ_κο6μ3πρέϲϲ_κο6μ3πρέϲϲ_κο6μ3πρεσσ_κο6μ3πρεϲϲ_κοντρα5μ2π_κωλού5μ2πρ_κωλού5μ2πρ_κωλοῦ5μ2πρ_κωλου5μ2πρ_μανιτό5μ2π_μανιτό5μ2π_μπα6μ3πάκι_μπα6μ3πάκι_μπα6μ3πακι_ρεπού5μ2πλ_ρεπού5μ2πλ_ρεπου5μ2πλ_τα6μ3περαμ_τα6μ3ποῦρλ_τα6μ3πούρλ_τα6μ3πούρλ_τρα5μ2ποῦκ_τρα5μ2πούκ_τρα5μ2πούκ_τρα5μ2πουκ_τσι5μ2πούκ_τσι5μ2πούκ_τϲι5μ2πούκ_τϲι5μ2πούκ_τσι5μ2πουκ_τϲι5μ2πουκ_τσι5μ2πούσ_τσι5μ2πούσ_τϲι5μ2πούϲ_τϲι5μ2πούϲ_τσι5μ2πουσ_τϲι5μ2πουϲ_γιαχου5ν2τ",
|
||||
12 : "_σαλτιπά5γ2κ_σαλτιπά5γ2κ_ϲαλτιπά5γ2κ_ϲαλτιπά5γ2κ_κουλού5μ2πρ_κουλού5μ2πρ_κουλοῦ5μ2πρ_κουλου5μ2πρ_μπου5μ2πούν_μπου5μ2πούν_μπου5μ2ποῦν_μπου5μ2πουν_χοντρο5μ2πα_λικβι5ν2ταρ_ντερμπε5ν2τ_ντου5ν2τούκ_ντου5ν2τούκ_ντου5ν2τοῦκ_ντου5ν2τουκ_φαστφου5ν2τ_φαϲτφου5ν2τ",
|
||||
13 : "_μπασκε2τ5μ2π_μπαϲκε2τ5μ2π_μπασι5μ2πουζ_μπαϲι5μ2πουζ"
|
||||
}
|
||||
};
|
18
resources/viewer/hyphenate/patterns/en-gb.js
Normal file
21
resources/viewer/hyphenate/patterns/en-us.js
Normal file
@ -1,17 +1,17 @@
|
||||
Hyphenator.languages.fi = {
|
||||
Hyphenator.languages['fi'] = {
|
||||
leftmin : 2,
|
||||
rightmin : 2,
|
||||
shortestPattern : 2,
|
||||
longestPattern : 7,
|
||||
specialChars : 'öäå',
|
||||
specialChars : "öäå",
|
||||
patterns : {
|
||||
3 : '1ba1be1bi1bo1bu1by1da1de1di1do1du1dy1dä1dö1fa1fe1fi1fo1fu1fy1ga1ge1gi1go1gu1gy1gä1gö1ha1he1hi1ho1hu1hy1hä1hö1ja1je1ji1jo1ju1jy1jä1jö1ka1ke1ki1ko1ku1ky1kä1kö1la1le1li1lo1lu1ly1lä1lö1ma1me1mi1mo1mu1my1mä1mö1na1ne1ni1no1nu1ny1nä1nö1pa1pe1pi1po1pu1py1pä1pö1ra1re1ri1ro1ru1ry1rä1rö1sa1se1si1so1su1sy1sä1sö1ta1te1ti1to1tu1ty1tä1tö1va1ve1vi1vo1vu1vy1vä1vöä2yo1yö2ya1äa1öo1äo1öä2äö2öä2öö2ä_ä2u2sb2lb2rd2rf2lf2rg2lg2rk2lp2lp2rc2lq2v',
|
||||
4 : 'y1a2y1o2u1y2y1u2ö3a2ö3o2ä3a2ä3o2ä1u2ö1u2u1ä2u1ö2e1aai1aao1aau1aau1eea1uui1uue1uuo1uuää1iää1eää3yi1ääe1ääy1ääi1ööa1eia1oie1aii1auy1eiai1aai1eai1oai1uau1aau1eeu1aie1aie1oie1yiu1aiu1eiu1ooi1aoi1eoi1ooi1uo1uiou1eou1oue1aui1euo1auo1ue1ö2ö1e2r2asl2as1k2vsc2hts2h',
|
||||
5 : '1st2raa1i2aa1e2aa1o2aa1u2ee1a2ee1i2ee1u2ee1y2ii1a2ii1e2ii1o2uu1a2uu1e2uu1o2uu1i2io1a2io1e2keus11b2lo1b2ri1b2ro1b2ru1d2ra1f2la1f2ra1f2re1g2lo1g2ra1k2ra1k2re1k2ri1k2va1p2ro1q2vich2r',
|
||||
6 : '1sp2lialous1rtaus1perus12s1ase2s1apuulo2s1bib3li',
|
||||
7 : 'yli1o2pali1a2v2s1ohje1a2sian1a2siat1a2sioi2s1o2sa2n1o2sa_ydi2n12n1otto2n1oton2n1anto2n1anno2n1aika2n1a2jo2s1a2jo',
|
||||
8 : '2s1a2sia2n1o2pet2s1a2loialkei2s12n1e2dus2s1ajatu2s1y2rit2s1y2hti2n1a2jan2n1o2mai2n1y2lit2s1a2len2n1a2len',
|
||||
9 : '2s1o2pisk2n1o2pist2s1o2pist2s1i2dea_2s1i2dean2s1e2sity_suu2r1a2',
|
||||
11 : '1a2siaka2s1'
|
||||
3 : "1ba1be1bi1bo1bu1by1da1de1di1do1du1dy1dä1dö1fa1fe1fi1fo1fu1fy1ga1ge1gi1go1gu1gy1gä1gö1ha1he1hi1ho1hu1hy1hä1hö1ja1je1ji1jo1ju1jy1jä1jö1ka1ke1ki1ko1ku1ky1kä1kö1la1le1li1lo1lu1ly1lä1lö1ma1me1mi1mo1mu1my1mä1mö1na1ne1ni1no1nu1ny1nä1nö1pa1pe1pi1po1pu1py1pä1pö1ra1re1ri1ro1ru1ry1rä1rö1sa1se1si1so1su1sy1sä1sö1ta1te1ti1to1tu1ty1tä1tö1va1ve1vi1vo1vu1vy1vä1vöä2yo1yö2ya1äa1öo1äo1öä2äö2öä2öö2ä_ä2u2sb2lb2rd2rf2lf2rg2lg2rk2lp2lp2rc2lq2v",
|
||||
4 : "y1a2y1o2u1y2y1u2ö3a2ö3o2ä3a2ä3o2ä1u2ö1u2u1ä2u1ö2e1aai1aao1aau1aau1eea1uui1uue1uuo1uuää1iää1eää3yi1ääe1ääy1ääi1ööa1eia1oie1aii1auy1eiai1aai1eai1oai1uau1aau1eeu1aie1aie1oie1yiu1aiu1eiu1ooi1aoi1eoi1ooi1uo1uiou1eou1oue1aui1euo1auo1ue1ö2ö1e2r2asl2as1k2vsc2hts2h",
|
||||
5 : "1st2raa1i2aa1e2aa1o2aa1u2ee1a2ee1i2ee1u2ee1y2ii1a2ii1e2ii1o2uu1a2uu1e2uu1o2uu1i2io1a2io1e2keus11b2lo1b2ri1b2ro1b2ru1d2ra1f2la1f2ra1f2re1g2lo1g2ra1k2ra1k2re1k2ri1k2va1p2ro1q2vich2r",
|
||||
6 : "1sp2lialous1rtaus1perus12s1ase2s1apuulo2s1bib3li",
|
||||
7 : "yli1o2pali1a2v2s1ohje1a2sian1a2siat1a2sioi2s1o2sa2n1o2sa_ydi2n12n1otto2n1oton2n1anto2n1anno2n1aika2n1a2jo2s1a2jo",
|
||||
8 : "2s1a2sia2n1o2pet2s1a2loialkei2s12n1e2dus2s1ajatu2s1y2rit2s1y2hti2n1a2jan2n1o2mai2n1y2lit2s1a2len2n1a2len",
|
||||
9 : "2s1o2pisk2n1o2pist2s1o2pist2s1i2dea_2s1i2dean2s1e2sity_suu2r1a2",
|
||||
11 : "1a2siaka2s1"
|
||||
}
|
||||
};
|
@ -1,27 +1,26 @@
|
||||
Hyphenator.languages.fr = {
|
||||
// The french hyphenation patterns are retrieved from
|
||||
// http://tug_org/svn/texhyphen/trunk/collaboration/repository/hyphenator/
|
||||
Hyphenator.languages['fr'] = {
|
||||
leftmin : 2,
|
||||
rightmin : 2,
|
||||
rightmin : 3,
|
||||
shortestPattern : 1,
|
||||
longestPattern : 14,
|
||||
specialChars : 'âêîôûçœéèàî',
|
||||
//The french hyphenation patterns are retrieved from http://extensions.services.openoffice.org/project/french-dictionary-reform1990
|
||||
//They are under LGPL
|
||||
specialChars : "àâçèéêîïôûœ’'",
|
||||
patterns : {
|
||||
2 : "1j1q1ç",
|
||||
3 : "'a4'e4'i4'o4'u4'y4'â4'è4'é4'ê4'î4'ô4'û4_a4_e4_i4_o4_u4_y4_â4_è4_é4_ê4_î4_ô4_û41ba1be1bi1bo1bu1by1bâ1bè1bé1bê1bî1bô1bû1ca1ce1ci1co1cu1cy1câ1cè1cé1cê1cî1cô1cû1da1de1di1do1du1dy1dâ1dè1dé1dê1dî1dô1dû1fa1fe1fi1fo1fu1fy1fâ1fè1fé1fê1fî1fô1fû1ga1ge1gi1go1gu1gy1gâ1gè1gé1gê1gî1gô1gû1ha1he1hi1ho1hu1hy1hâ1hè1hé1hê1hî1hô1hû1ka1ke1ki1ko1ku1ky1kâ1kè1ké1kê1kî1kô1kû1la1le1li1lo1lu1ly1là1lâ1lè1lé1lê1lî1lô1lû1ma1me1mi1mo1mu1my1mâ1mè1mé1mê1mî1mô1mû1na1ne1ni1no1nu1ny1nâ1nè1né1nê1nî1nô1nû1pa1pe1pi1po1pu1py1pâ1pè1pé1pê1pî1pô1pû1ra1re1ri1ro1ru1ry1râ1rè1ré1rê1rî1rô1rû1sa1se1si1so1su1sy1sâ1sè1sé1sê1sî1sô1sû1ta1te1ti1to1tu1ty1tà1tâ1tè1té1tê1tî1tô1tû1va1ve1vi1vo1vu1vy1vâ1vè1vé1vê1vî1vô1vû1wa1we1wi1wo1wu1za1ze1zi1zo1zu1zy1zè1zé2'22jkn1xé1q",
|
||||
4 : "_1ba_1bi_1ci_1co_1cu_1da_1di_1do_1dy_1dé_1ge_1la_1ma_1mi_1mo_1mé_1no_1pa_1pe_1po_1pu_1pé_1re_1ré_1sa_1se_1so_1su_1sy_1ta1b2l1b2r1c2h1c2k1c2l1c2r1c½01d2r1f2l1f2r1g2l1g2n1g2r1k2h1k2r1m½01n½01p2h1p2l1p2r1r2h1s2h1s½01t2h1t2r1v2r1w2r2chb2chg2chm2chn2chp2chs2cht2chw2ckb2ckf2ckg2ckp2cks2ckt2phn2phs2pht2shm2shr2shs2thl2thm2thn2ths4be_4ce_4ch_4ck_4de_4fe_4ge_4he_4je_4ke_4kh_4le_4me_4ne_4pe_4ph_4re_4se_4sh_4te_4th_4ve_4we_4ze_a1bîa1laa1maa1nea1nia1poa1viab2hac1qad2har1cb1leb1reb1ruc1cic1kec1lac1lec1rec2hac2hec2hic2hoc2huc2hyc2hâc2hèc2héc2hêc2hîc2hôc2hûch2lch2rd1had1hod1led1red1s2e1nif1laf1lef1ref1rif1s2g1leg1neg1rag1reg1s2i1vail2ll1lil1lul1mem1nèm1ném1s2n1sao1pup1hep1hop1lep1lup1nep1rep1rip1rop1rup1rép1syp1tèp1téph2lph2rr1cir1her1hyr1mis1cas1cos1hes1hos1las1los1pas1pes1pis1pos1tas1tes1tis1tos1tys1tét1het1rat1ret1rit1ruth2ru1ciu1niu1viv1reâ1meè1meé1ceé1cié1cué1deé1leé1lié1loé1léé1mié1neé1nié1pié1reô1me",
|
||||
5 : "'a1mi'a1na'a1po'o1vi_1p2l_1p2r_1t2r_a1mi_a1na_a1po_c2hè_con4_cul4_dé2s_o1vi_p1ha_p1lu_p1ro_p1ré_p1sy_pe4r_réu2_s1ta_s1ti_t1ri_é1mi1d2'21g2ha1g2he1g2hi1g2ho1g2hy1p2né4bes_4ces_4des_4fes_4ges_4gue_4hes_4jes_4kes_4les_4mes_4nes_4pes_4que_4res_4ses_4tes_4ves_4wes_4zes_ab1seac1ceai1meal1coan1tiap1paar1mear1mias1meau1meca1pica1rêch1lech1loch1rech1rocil3lco1apco1arco1auco1axco1efco1enco1exco1nuco1é2cy1rid1d2hda1medi1lidé1caer1mees1cees1coes1tifa1mefu1mefé1cugil3lhil3lhu1mehy1pehy1pohé1mihé1moi1b2ri1oxyib1riim1maim1miim1poim1puin1ciin1diin1doin1duin1foin1noin1soin1tein1tiis1ceis1taja1cel1s2tlil3lmi1memil3lmo1nomu1nimé1coo1b2lo1d2lo1g2nob1looc1teog1noom1buom1meom1nior1meos1taos1tios1toos1tépa1lépa1piph1leph1reph1taph1tipi1ripo1lypu1pipu1sipé1nupé1réra1dira1mere1lere1lire1peri1meru1leré1geré1maré1suré1tis1c2ls1p2hs1t2rsc1lésc2hese1mises1qsp1hèsp1hést1rost1rusu1mesu1pesu1rasu1rét1c2ht1t2lta1metc2hith1reth1rito1metu1meté1léue1viva1civa1nive1nivi1divil3lvé1loxil3lys1toé1d2réd1riéd2hiélo1q",
|
||||
6 : "'a1b2r'a1g2n'ab1ré'ag1na'an1ti'ar1ge'ar1pe'as2ta'i1g2n'in1te'in2er'on1gu_1c2h4_1k2h4_1p2h4_1s2h4_1t2h4_a1b2r_a1g2n_ab1ré_ag1na_an1ti_ar1de_ar1ge_ar1pe_as2ta_bi1au_bi1u2_ci1sa_co1o2_cons4_do1le_dy2s3_dé1a2_dé1io_dé1o2_dé1sa_dé1se_dé1so_dé1su_i1g2n_in1te_in2er_la1te_ma1la_ma1le_ma1li_ma1lo_mil3l_mo1no_mé1go_mé1se_mé1su_mé1ta_mé2sa_no1no_on1gu_pa1na_pa1ni_pa1no_pa1ra_pa1re_pa1te_pé1ri_re1s2_res1q_ré1a2_ré1e2_ré1i2_ré1o2_ré1é2_ré2el_ré2er_ré2èr_su1bi_su1bu_su1ri_su1ro_ta1le1m2nès1octet1p2neu1p2tèr1p2tér1s2c2h1s2cop1s2lav1s2lov1s2por2bent_2c1k3h2cent_2dent_2fent_2gent_2jent_2kent_2lent_2nent_2pent_2rent_2s2chs2s3hom2sent_2tent_2vent_2went_2xent_2zent_3d2hal4b4le_4b4re_4c4he_4c4ke_4c4le_4c4re_4d4re_4f4le_4f4re_4g4le_4g4ne_4g4re_4gues_4p4he_4p4le_4p4re_4ques_4r4he_4s4ch_4s4he_4t4he_4t4re_4v4re_abs1tiabî1meani1mear1c2harc2hias1t2ravil4laè1d2rbou1mebou1tibru1mecci1deche1vicla1meco1a2dco1accco1g2ncog1nicom1pécon1ficon1nicon1ticor1pucur1redis1codis1sidis1tidé1t2rdét1rien1t2rent1reeus1taex1t2rfi1c2hfic2hufir1mefri1tihémi1éins1tiisc2hiiva1leiè1d2rl3lionla1w2rllu1memil1lemit1tenu1t2rnut1riopu1leos1t2rost1raost1rioxy1a2oè1d2rpe1r3hper1maper1tipho1toplu1mepri1vapru1depré1sepu1g2npug1napé2nulqua1merai1mercil4lrin1germil4lry1t2hry2thmser1geser1pesla1lospa1tispi1rospo1rusto1mosté1résu1b2lsub1lisub1s2suc1cuta1c2htac2hytan1getem1péter1getes1tator1retri1detru1cutur1buucil4luvil4lvol1tawa2g3néci1meécu1meédri1qéli1meélé1meémil4léni1te",
|
||||
7 : "'ab3réa'ami1no'e1n1a2'e1n1o2'i1n1a2'i1n1e2'i1n1i2'i1n1o2'i1n1u2'i1n1é2'i2g3ni'i2g3né'i2g4no'in2ept'in2i3q'in2i3t'in2ond'in2u3l'in2uit'ina1ni'ini1mi'ino1cu'ins1ta'iné1lu'iné1na'oua1ou_ab3réa_ami1no_bai1se_bi1a2c_bi1a2t_bio1a2_com1me_coo1li_da1c2r_dac1ry_di1ald_di1e2n_di2s3h_dia1ci_dia1to_dé2s1½_dé3s2c_dé3s2p_dé3s2t_e1n1a2_e1n1o2_gem1me_i1n1a2_i1n1e2_i1n1i2_i1n1o2_i1n1u2_i1n1é2_i2g3ni_i2g3né_i2g4no_in2ept_in2i3q_in2i3t_in2ond_in2u3l_in2uit_ina1ni_ini1mi_ino1cu_ins1ta_iné1lu_iné1na_ma1c2r_ma1g2n_ma2c3k_ma2r1x_mac1ro_mag1ni_mag1nu_mil1li_mé2s1i_mé3san_oua1ou_pa1r2h_pen2ta_pha1la_plu1ri_pon1te_pos1ti_pro1é2_pré1a2_pré1e2_pré1i2_pré1o2_pré1s2_pré1u2_pré1é2_pré2au_re2s3s_re2s3t_res1ca_res1ci_res1co_res1pe_res1pi_res1po_res1se_res1ta_res1ti_res1to_res1té_ré1t2r_ré2aux_ré2uss_réa1li_rét1ro_sar1me_ser1me_seu2le_sou1ve_stil3l_su1b2l_su2r3h_sub1li_émi1ne1alcool1s2clér1s2perm1s2phèr1s2phér1s2piel1s2tein1s2tigm1é2drie1é2nerg2chent_2guent_2phent_2quent_2r3heur2shent_2t3heur3d2houd3ph2tis4b4les_4b4res_4c4hes_4c4kes_4c4les_4c4res_4ch4le_4ch4re_4d4res_4f4les_4f4res_4g4les_4g4nes_4g4res_4p4hes_4p4les_4p4res_4ph4le_4ph4re_4r4hes_4s4hes_4t4hes_4t4res_4th4re_4v4res_amal1gaanti1feappa1rearmil5lcapil3lcarê1mechlo1rachlo1réchro1meco1ac1qco2nurbcoas1socoas1sucyril3ldia1p2hdiaph2rdili1gedéca1dee2s3c2he2s3copesti1meext1ra1extra2cextra2iflam1mefécu1legram1megran1dihype4r1hypers2hypo1a2hypo1e2hypo1i2hypo1o2hypo1s2hypo1u2hypo1é2i1al1gii1s2c2hi1s2tatiar1t2hibril3limma1neimmi1neimpo1teimpu1deinci1deindi1geindo1leinno1ceinso1leinti1meio1a2ctl2ment_la2w3remil4letmon1t2rmono1vamont1rémoye1nâmuni1fin3s2at_o1io1nio1s2taso1s2tato1s2timo1s2tomogno1moomni1poomni1s2papil2lpiril3lpoly1a2poly1e2poly1i2poly1o2poly1s2poly1u2poly1vapoly1è2poly1é2pros1taproé1mipréé1mipupil3lpusil3lreli1meryth1meréma1neréti1cesemil4lstan1dasu3r2ahsupe4r1supers2suré1mither1mothril3ltung2s3télé1e2télé1i2télé1s2u2s3t2ruevil4luni1a2xuni1o2vvacil4lvanil2lven1t2rveni1mevent1rividi1mey1al1giy1s2tomâ2ment_è2ment_é3cent_é3dent_é3rent_épis1coéqui1poéqui1vaô2ment_",
|
||||
8 : "'a2g3nat'anti1a2'anti1e2'anti1s2'anti1é2'eu2r1a2'inau1gu'inef1fa'inte4r3'inters2'ovi1s2c_1s2c2h4_a2g3nat_anti1a2_anti1e2_anti1s2_anti1é2_bi2s1a2_chè1v2r_chèv1re_con1t2r_cont1re_di1a2cé_di1a2mi_dy2s1a2_dy2s1i2_dy2s1o2_dy2s1u2_dé2s1i2_dé2s1é2_dés2a3m_désa1te_dési1ne_déso1pi_eu2r1a2_inau1gu_inef1fa_inte4r3_inters2_ma2l1ap_ma2l1en_ma2l1oc_mono1a2_mono1e2_mono1i2_mono1o2_mono1s2_mono1u2_mono1é2_mé2g1oh_mé2s1es_ovi1s2c_pa2n1is_pa2r3hé_para1s2_pe1r1a2_pe1r1e2_pe1r1i2_pe1r1o2_pe1r1u2_pe1r1é2_pluri1a_pon2tet_pos2t3h_pos2t3r_post1s2_pro1g2n_prog1na_psyc2ho_pud1d2l_péri1os_péri1s2_péri1u2_re3s4tu_re3s4ty_res1c2r_res1p2l_resp1le_rest1re_rest1ri_ré2a3le_ré2i3fi_sta2g3n_su2b1a2_su2b1in_su2b1ur_su2b1é2_su2b3lu_su2r1a2_su2r1e2_su2r1of_su2r1ox_su2r1é2_su3r2et_syn1g2n_syng1na_tri1a2c_tri1a2n_tri1a2t_tri1o2n1m2né1mo1m2né1si1s2patia1s2piros1s2tomos1s2ty1le2b2lent_2b2rent_2c2kent_2c2lent_2c2rent_2d2lent_2d2rent_2f2lent_2f2rent_2g2lent_2g2nent_2g2rent_2p2lent_2p2rent_2t2rent_2v2rent_4ch4les_4ch4res_4ph4les_4ph4res_4s4c4he_4th4res_a1è2d1rea2s3t1roab3sent_absti1neac3cent_ai2ment_anes1t2hanest1héar2ment_as2ment_au2ment_boutil3lca3ou3t2chevil4lchien1deco1assocco1assurcompé1teconfi1deconni1veconti1necorpu1leda2ment_di2s3copdiaph1radissi1dedistil3ldétri1meentre1geer2ment_es3cent_eu1s2tatfa2ment_fichu1mefritil3lfu2ment_hu2ment_hype1ra2hype1re2hype1ri2hype1ro2hype1ru2hype1ré2hémo1p2ti1arth2ri1è2d1rei2s3c2héi2s3chiai2s3chioimmis1ceindul1geinfor1mainstil3lintel1liis3cent_ja3cent_mi2ment_mécon1ten3s2ats_nutri1meo1s2trado1è2d1reo2b3longom2ment_ombud2s3or2ment_paléo1é2papil1lopapil3lapapil3lepapil3liperma1neperti1nephoto1s2poas1t2rpu2g3nacpé1r2é2qra2ment_radio1a2re3lent_re3pent_ri2ment_ru3lent_ré3gent_résur1geslalo1mesporu1lesu2ment_subli1mesuccu1lesupe1ro2ta2ment_tachy1a2tchin3t2tempé1ratesta1meto2ment_tran2s3htran2s3ptrans1patrucu1letu2ment_turbu1letélé1o2btélé1o2pvanil1liy1as1t2hé3quent_",
|
||||
9 : "'ae3s4c2h'apo2s3ta'ar3gent_'ar3pent_'in1s2tab'in2a3nit'in2augur'in2effab'in2o3cul'inte1ra2'inte1re2'inte1ri2'inte1ro2'inte1ru2'inte1ré2_ae3s4c2h_apo2s3ta_ar3dent__ar3gent__ar3pent__baise1ma_ci2s1alp_co2o3lie_di1a2cid_di1a2tom_do3lent__dé2s1u2n_dé3s2ert_dé3s2exu_dé3s2i3d_dé3s2i3r_dé3s2ist_dé3s2o3l_dé3s2orm_dé3s2orp_désen1si_in1s2tab_in2a3nit_in2augur_in2effab_in2o3cul_inte1ra2_inte1re2_inte1ri2_inte1ro2_inte1ru2_inte1ré2_la3tent__ma2g3num_ma2l1a2v_ma2l1int_ma2l1o2d_magni1ci_magni1fi_mala1d2r_malad1re_milli1am_mé2s1u2s_no2n1obs_pa2n1a2f_pa2n1opt_pa3rent__pa3tent__para1c2h_pos2t1in_pos2t1o2_pro1s2cé_prou3d2h_pré2a3la_péri2s3s_re2s3cap_re2s3cou_re2s3pir_re3s4t2r_re3s4tab_re3s4tag_re3s4tat_re3s4tim_re3s4tip_re3s4toc_re3s4top_re3s4tén_re3s4tér_ré2a3lis_ré2a3lit_rétro1a2_su2b3lin_su2r1i2m_su2r1inf_su2r1int_su3b2alt_su3b2é3r_su3r2a3t_su3r2eau_su3r2ell_subli1mi_ta3lent_1informat1p2sy1c2h1s2ca1p2h1s2to1c2k1é2drique1é2lément2r3hy1d2r3ph2ta1lé4s4c4hes_a2l1al1giabî2ment_amalga1meani2ment_apo2s3t2rarchié1pibou2ment_bru2ment_cci3dent_cla2ment_contin1gecur3rent_e2n1i2v2rfir2ment_grandi1loiva3lent_llu2ment_mit3tent_monova1lemunifi1ceo1s2té1roo2g3no1siomnipo1teopu3lent_ostric1tipapil3lomplu2ment_po1ast1repolyva1leprivat1dopro2s3tatproémi1nepru3dent_pré3sent_préémi1nepugna1b2lqua2ment_rai3ment_rin3gent_ser3gent_ser3pent_sesqui1a2stéréo1s2surémi1netan3gent_ter3gent_thermo1s2tor3rent_tran2s1a2tran2s1o2tran2s1u2tri3dent_vanil3linvanil3lisventri1povol2t1ampvélo1s2kiéci2ment_écu2ment_éli2ment_éni3tent_épi2s3copéquipo1teéquiva1le",
|
||||
10 : "'amino1a2c'ana3s4t2r'in2exo1ra'on3guent__1p2sy1c2h_amino1a2c_ana3s4t2r_chèvre1fe_com3ment__contre1ma_dacryo1a2_dé3s2i3li_gem2ment__in2exo1ra_macro1s2c_mono1ï2dé_on3guent__pa2n1a2mé_pa2n1a2ra_péri2s3ta_re2s3c1ri_re2s3pect_re2s3pons_re2s3quil_re3s4tand_re4s5trin_res3sent__sar3ment__ser3ment__sou3vent__émi3nent_1s2tandard1s2tro1p2h1s2truc1tu1é2lec1t2racquies1ceantifer1meappa3rent_carê2ment_chlo2r3a2cchlo2r3é2tchro2ment_co2g3ni1tidili3gent_déca3dent_esti2ment_flam2ment_fécu3lent_gram2ment_grandilo1qimma3nent_immi3nent_impo3tent_impu3dent_inci3dent_indi3gent_indo3lent_inno3cent_inso3lent_intelli1geinti2ment_mon2t3réalmoye2n1â2go1s2tra1tureli2ment_ryth2ment_réma3nent_réti3cent_tempéra1metran3s2acttran3s2atstranspa1reveni2ment_vidi2ment_élo3quent_",
|
||||
11 : "'anti2en1ne'in2i3mi1ti_1dé3s2o3dé_anti2en1ne_contre1s2c_dé3s2a3c2r_dé3s2ensib_dé3s2i3g2n_dé3s2i3nen_dé3s2in1vo_dé3s2o3pil_dé3s2é3g2r_in2i3mi1ti_ma2l1ai1sé_magnifi1ca_mé1ta1s2ta_pa2n1o2p2h_phalan3s2t_psycho1a2n_re2s3ci1si_re2s3ci1so_re2s3plend_re4s5trein_re4s5trict_su2b3liminabsti3nent_archi1é2pischien3dent_compé3tent_confi3dent_conni3vent_conti3nent_corpu3lent_diaphrag1medissi3dent_détri3ment_entre3gent_fichu3ment_immis4cent_indul3gent_mécon3tent_nutri3ment_o2g3nomo1niperma3nent_perti3nent_privatdo1ceprivatdo1zepu2g3nab1lerésur3gent_slalo2ment_sporu4lent_subli2ment_succu3lent_testa3ment_trucu3lent_turbu3lent_ventripo1teépi3s4co1pe",
|
||||
12 : "'in2é3luc1ta'in2é3nar1ra_bai2se3main_dé3s2a3tell_dé3s2as1t2r_dé3s2ou1f2r_in2é3luc1ta_in2é3nar1ra_ma2l1a2d1ro_ma2l1a2dres_pa2r1a2c2he_pa2r1a2c2hè1a2nesthé1siamalga2ment_contin3gent_monova3lent_munifi3cent_o1s2trictionomnipo3tent_polyva3lent_proémi3nent_préémi3nent_surémi3nent_équipo3tent_équiva4lent_",
|
||||
13 : "_ma2g3nici1de_ma2g3nificat_pro2g3na1t2h_syn2g3na1t2hacquies4cent_antifer3ment_intelli3gent_tempéra3ment_transpa3rent_",
|
||||
14 : "_chèvrefeuil2l_contremaî1t2rdiaphrag2ment_privatdo3cent_privatdo3zent_ventripo3tent_",
|
||||
2 : "1ç1j1q",
|
||||
3 : "1gè’â41zu1zo1zi1zè1zé1ze1za’y4_y41wu1wo1wi1we1wa1vy1vû1vu1vô1vo1vî1vi1vê1vè1vé1ve1vâ1va’û4_û4’u4_u41ba1bâ1ty1be1bé1bè1bê1tû1tu1tô1bi1bî1to1tî1ti1tê1tè1té1te1tà1tâ1ta1bo1bô1sy1sû1su1sœ1bu1bû1by2’21ca1câ1sô1ce1cé1cè1cê1so1sî1si1sê1sè1sé1se1sâ1sa1ry1rû1ru1rô1ro1rî1ri1rê1rè1ré1re1râ1ra’a41py1pû1pu1pô1po1pî1pi1pê1pè1pé1pe1pâ1pa_ô41ci1cî’ô4’o4_o41nyn1x1nû1nu1nœ1nô1no1nî1ni1nê1nè1né1ne1nâ1co1cô1na1my1mû1mu1mœ1mô1mo1mî1mi1cœ1mê1mè1mé1me1mâ1ma1ly1lû1lu1lô1lo1lî1li1lê1lè1cu1cû1cy1lé1d’1da1dâ1le1là1de1dé1dè1dê1lâ1la1ky1kû1ku1kô1ko1kî1ki1kê1kè1ké1ke1kâ1ka2jk_a4’î4_î4’i4_i41hy1hû1hu1hô1ho1hî1hi1hê1hè1hé1he1hâ1ha1gy1gû1gu1gô1go1gî1gi1gê_â41gé1ge1gâ1ga1fy1di1dî1fû1fu1fô1fo’e41fî1fi1fê1fè1do1dô1fé1fe1fâ1fa’è41du1dû1dy_è4’é4_é4’ê4_ê4_e41zy",
|
||||
4 : "1f2lab2h2ckg2ckp2cksd1s22ckb4ck_1c2k2chw4ze_4ne_2ckt1c2lad2hm1s22cht2chsch2r2chp4pe_1t2r1p2h_ph44ph_ph2l2phnph2r2phs1d2r2pht2chn4fe_2chm1p2l1p2r4me_1w2rch2l2chg1c2r2chb4ch_1f2r4le_4re_4de_f1s21k2r4we_1r2h_kh44kh_1k2h4ke_1c2h_ch44ge_4je_4se_1v2r_sh41s2h4ve_4sh_2shm2shr2shs4ce_il2l1b2r4be_1b2l4he_4te__th41t2h4th_g1s21g2r2thl1g2l2thm2thnth2r1g2n2ths2ckf",
|
||||
5 : "2ck3h4rhe_4kes_4wes_4res_4cke_éd2hi4vre_4jes_4tre_4zes_4ges_4des_i1oxy4gle_d1d2h_cul44gne_4fre_o1d2l_sch44nes_4les_4gre_1s2ch_réu24sch_4the_1g2hy4gue_2schs4cle_1g2ho1g2hi1g2he4ses_4tes_1g2ha4ves_4she_4che_4cre_4ces_t1t2l4hes_l1s2t4bes_4ble__con4xil3lco1ap4que_vil3l4fle_co1arco1exco1enco1auco1axco1ef4pes_co1é2per3h4mes__pe4r4bre_4pre_4phe_1p2né4ple__dé2smil3llil3lhil3l4dre_cil3lgil3l4fes_",
|
||||
6 : "’in1o2rcil4l4phre_4dres_l3lioni1algi2fent_émil4l4phle_rmil4l4ples_4phes_1p2neuextra14pres_y1asthpé2nul2xent__mé2sa2pent_y1algi4chre_1m2nès4bres_1p2tèr1p2tér4chle_’en1o24fles_oxy1a2avil4l_en1o24ques_uvil4lco1a2d4bles__in1a2’in1a21s2por_cons4_bi1u2’as2ta_in1e2’in1e2_in1é2’in1é21s2lov1s2lavco1acq2cent__as2ta_co1o24ches_hémi1é_in2er’in2er2s3homo1ioni_in1i2’in1i22went_4shes__ré1a2_ré1é2_ré1e2_ré2el_in1o2ucil4lco1accu2s3tr_ré2er_ré2èr4cles_2vent__ré1i22sent_2tent_2gent__ré1o24gues__re1s24sche_4thes_’en1a2e2s3ch4gres_1s2cop2lent__en1a22nent__in1u2’in1u24gnes_4cres_wa2g3n4fres_4tres_4gles_1octet_dé1o2_dé1io4thre__bi1au2jent__dé1a22zent_4vres_2dent_4ckes_4rhes__dy2s3sub1s22kent_2rent_2bent_3d2hal",
|
||||
7 : "a2g3nos3d2houdé3rent__dé3s2t_dé3s2pé3dent_2r3heur2r3hydri1s2tat2frent_io1a2ctla2w3re’in2u3l_in2u3l2crent_’in2uit_in2uit1s2caph1s2clér_ré2ussi2s3ché_re2s3t_re2s3s4sches_é3cent__seu2le’in2ond_in2ond’in2i3t_in2i3t’in2i3q_ré2aux_in2i3q2shent__di1alduni1a2x’in2ept2flent__in2eptuni1o2v2brent_co2nurb2chent_2quent_1s2perm1s2phèr_ma2c3kuevil4l1s2phér1s2piel1s2tein1s2tigm4chles_1s2tock1s2tyle1p2sych_pro1é2_ma2r1x_stil3lpusil3libril3lcyril3l_pré1s2thril3l_mé3san_pré1u2_mé2s1i_pré1o2_pré1i2piril3lpupil3lâ2ment__pré1e2_pré1é2_pré2au_pré1a22prent_2vrent_supero2_di1e2npoly1u2è2ment_poly1s2poly1o2poly1i2poly1è2poly1é2poly1e2poly1a2supe4r1capil3l2plent_armil5lsemil4lmil4letvacil4l_di2s3h3ph2tis2dlent_a2s3tro4phres_l2ment_i1è2drei1arthr2drent_4phles_supers2ô2ment_extra2i2phent_su3r2ah_su2r3hextra2chypo1u21alcool_per1u2_per1o2_per1i2_per1é2hypo1s2_per1a2hypo1o2hypo1i2hypo1é2_pen2tahypo1e2hypo1a2y1s2tome2s3cophyperu2hype4r1hypers2hypero21m2némohyperi21m2nési4chres_a1è2drehyperé2hypere2hypera2’oua1ou_oua1ouo1s2tomo1s2timo1s2tato1s2tasomni1s2tung2s3_dé3s2c2blent__bio1a2télé1e2télé1i22clent_télé1s22guent_1é2nerg2grent_2trent__dé2s1œ2t3heuro1è2dre2gnent_2glent_4thres__bi1a2t1é2drie_bi1a2c_i2g3nin3s2at_’i2g3ni2ckent__i2g3né’ab3réa’i2g3né_ab3réa_per1e2",
|
||||
8 : "_ma2l1ap_dy2s1u2_dy2s1o2_dy2s1i2n3s2ats__dy2s1a2distil3l1é2lectrinstil3l1s2trophe2n1i2vro2b3long1s2tomos_ae3s4ch’ae3s4ch_eu2r1a2ombud2s3’eu2r1a2_mono1s2_mono1u2o1s2téro_mono1o2eu1s2tato1s2tradfritil3la2l1algi_mono1i2_mono1é2_ovi1s2c’ovi1s2c_mono1e2_mono1a2co1assocpaléo1é2boutil3l1s2piros_ré2i3fi_pa2n1ischevil4l1s2patiaca3ou3t2_di1a2cé_para1s2_pa2r3héco1assur_su2b1é2tu2ment_su2ment__su2b1in_su2b3lupapil3lire3pent_’inte4r3_su2b1urab3sent__su2b1a2di2s3cophu2ment_fu2ment__intera2au2ment_as2ment_or2ment_’intera2_intere2pé1r2é2q_péri1os_péri1s2ja3cent__anti1a2_péri1u2’anti1a2er2ment__anti1e2ac3cent_ar2ment_to2ment_’intere2ré3gent_papil3leom2ment_’anti1e2photo1s2_anti1é2_interé2’anti1é2_anti1s2’anti1s23ph2talé’interé2ri2ment__interi2’interi2mi2ment_apo2s3tri2s3chio_pluri1ai2s3chia_intero2’intero2_inte4r3po1astre_interu2’interu2_inters2ai2ment_’inters2papil3la_tri1o2n_su2r1a2_pon2tet_pos2t3h_dés2a3mes3cent__pos2t3r_post1s2_tri1a2tta2ment__tri1a2nra2ment_is3cent__su2r1e2_tri1a2cfa2ment_da2ment__su3r2et_su2r1é2_mé2s1es_mé2g1oh_su2r1of_su2r1ox_re3s4ty_re3s4tu_ma2l1oc’a2g3nat_dé2s1é2_ma2l1entachy1a2_pud1d2ltchin3t2_re3s4trtran2s3p_bi2s1a2tran2s3hhémo1p2té3quent__a2g3nat_dé2s1i2télé1o2bo2g3nosiradio1a2télé1o2ppu2g3nacru3lent__sta2g3nre3lent__ré2a3le_di1a2mi",
|
||||
9 : "_ré2a3lit_dé3s2o3lthermo1s2_dé3s2ist_dé3s2i3rmit3tent_éni3tent__do3lent__ré2a3lisopu3lent__pa3tent__re2s3cap_la3tent__co2o3lie_re2s3cou_re2s3cri_ma2g3num_re2s3pir_dé3s2i3dco2g3nititran2s1a2tran2s1o2_dé3s2exu_re3s4tab_re3s4tag_dé3s2ert_re3s4tat_re3s4tén_re3s4tér_re3s4tim_re3s4tip_re3s4toc_re3s4toptran2s1u2_no2n1obs_ma2l1a2v_ma2l1int_prou3d2hpro2s3tativa3lent__ta3lent__rétro1a2_pro1s2cé_ma2l1o2dcci3dent__pa3rent__su2r1int_su2r1inf_su2r1i2mtor3rent_cur3rent__mé2s1u2stri3dent__dé3s2orm_su3r2ell_ar3dent__su3r2eaupru3dent__pré2a3lacla2ment__su3r2a3t_pos2t1o2_pos2t1inqua2ment_ter3gent_ser3gent_rai3ment_abî2ment_éci2ment_’ar3gent__ar3gent_rin3gent_tan3gent_éli2ment_ani2ment_’apo2s3ta_apo2s3tavélo1s2kivol2t1amp_dé3s2orp_dé2s1u2n_péri2s3ssesqui1a2’ana3s4trfir2ment_écu2ment_ser3pent_pré3sent_’ar3pent__ar3pent_’in1s2tab_in1s2tab’in2o3cul_in2o3culplu2ment_bou2ment_’in2exora_in2exora_su2b3linbru2ment__su3b2é3r_milli1am’in2effab_in2effab’in2augur_di1a2cid_in2augur_pa2n1opt’in2a3nit_in2a3nit1informat_ana3s4trvanil3lis_di1a2tom_su3b2altvanil3linstéréo1s2_pa2n1a2fo1s2tratuépi2s3cop_ci2s1alp1s2tructu1é2lément1é2driquepapil3lomllu2ment_",
|
||||
10 : "1s2tandardimmi3nent__émi3nent_imma3nent_réma3nent_épi3s4cope_in2i3miti’in2i3miti_res3sent_moye2n1â2gréti3cent__dé3s2a3crmon2t3réalinno3cent__mono1ï2dé_pa2n1a2méimpu3dent__pa2n1a2ra_amino1a2c’amino1a2c_pa2n1o2phinci3dent__ser3ment_appa3rent_déca3dent__dacryo1a2_dé3s2astr_re4s5trin_dé3s2é3gr_péri2s3ta_sar3ment__dé3s2oufr_re3s4tandchro2ment__com3ment__re2s3quil_re2s3pons_gem2ment__re2s3pect_re2s3ciso_dé3s2i3gn_dé3s2i3ligram2ment__dé3s2invo_re2s3cisitran3s2act’anti2enneindo3lent__sou3vent_indi3gent_dili3gent_flam2ment_impo3tent_inso3lent_esti2ment_’on3guent__on3guent_inti2ment__dé3s2o3défécu3lent_veni2ment_reli2ment_vidi2ment_chlo2r3é2tpu2g3nablechlo2r3a2cryth2ment_o2g3nomonicarê2ment__méta1s2ta_ma2l1aisé_macro1s2célo3quent_tran3s2ats_anti2enne",
|
||||
11 : "_contre1s2cperti3nent_conti3nent__ma2l1a2dro_in2é3lucta_psycho1a2n_dé3s2o3pil’in2é3luctaperma3nent__in2é3narratesta3ment__su2b3liminrésur3gent_’in2é3narraimmis4cent__pro2g3nathchien3dent_sporu4lent_dissi3dent_corpu3lent_archi1é2pissubli2ment_indul3gent_confi3dent__syn2g3nathtrucu3lent_détri3ment_nutri3ment_succu3lent_turbu3lent__pa2r1a2che_pa2r1a2chèfichu3ment_entre3gent_conni3vent_mécon3tent_compé3tent__re4s5trict_dé3s2i3nen_re2s3plend1a2nesthésislalo2ment__dé3s2ensib_re4s5trein_phalan3s2tabsti3nent_",
|
||||
12 : "polyva3lent_équiva4lent_monova3lent_amalga2ment_omnipo3tent__ma2l1a2dreséquipo3tent__dé3s2a3tellproémi3nent_contin3gent_munifi3cent__ma2g3nicideo1s2trictionsurémi3nent_préémi3nent__bai2se3main",
|
||||
13 : "acquies4cent_intelli3gent_tempéra3ment_transpa3rent__ma2g3nificatantifer3ment_",
|
||||
14 : "privatdo3cent_diaphrag2ment_privatdo3zent_ventripo3tent__contre3maître",
|
||||
15 : "grandilo3quent_",
|
||||
16 : "_cont1re3maît1re",
|
||||
17 : "_chè2vre3feuil1le"
|
||||
16 : "_chè2vre3feuille"
|
||||
}
|
||||
};
|