rebase on upstream trunk

This commit is contained in:
Hiroshi Miura 2010-11-24 23:27:53 +09:00
commit bbf166479d
237 changed files with 37034 additions and 21337 deletions

View File

@ -4,6 +4,176 @@
# for important features/bug fixes.
# Also, each release can have new and improved recipes.
- version: 0.7.29
date: 2010-11-19
new features:
- title: "OSX binary build is now based on Qt 4.7. Also, the build is now Intel only and requires at least OS X 10.5.2. If you are on a PowerPC machine or an older OS X version, do not upgrade"
- title: "Content server: Allow direct navigation to a set of books in the book list."
tickets: [7453]
- title: "OS X: When deleting books, put the files into the recycle bin instead of deleting them permanently"
- title: "Add button to easy configure Hotmail as email relay. Also improve usability of easy config buttons"
- title: "Kobo driver: Support Currently_Reading category"
- title: "Catalog generation: Thumbnail caching, wishlist, improved description layout."
tickets: [7376]
- title: "Support for the Cybook Orizon"
bug fixes:
- title: "Fix restore to defaults in preferences incorrectly setting PDF unwrap factor to 0.0"
- title: "PDF Input: Fix unwrapping of accented characters"
- title: "Do not display dialogs asking for confirmation or showing conversion errors when calibre is minimized to system tray"
tickets: [7549]
- title: "calibre server: Fix regression that broke digest authentication when the calibre interface language was set to non English"
- title: "EPUB Output: Do not raise an error for invalid embedded fonts in the input document."
tickets: [7567]
- title: "RTF Input: Improved conversion of tables, with support for border styles on table cells"
- title: "E-book viewer: Fix regression that broke hyphenation. Also add more language patterns for hyphenation"
- title: "SONY driver: Fix cover thumbnails being uploaded to wrong directory on windows"
- title: "Fix UnicodeDecodeError when displaying a failed metadata fetch message"
tickets: [7560]
- title: "Bulk metadata edit: Speed up remove all tags operation"
- title: "MOBI Output: Specify image sizes in pixels instead of em to accomodate Amazon's @#$%#@! MOBI renderer"
- title: "Fix bug preventing customizing of builtin recipes if they are not ascii encoded"
- title: "SONY XML cache: Handle case where XML db contains reference to a file that does not exist gracefully"
improved recipes:
- Al Jazeera
- The Moscow Times
- GLobe and Mail
- Washington Post
new recipes:
- title: "Hannoversche Allgemeine Zeitung"
author: "Artemis"
- title: "globes.co.il"
author: "marbs"
- title: "THN and RDS"
author: "Nexus"
- title: "pclab.pl"
author: "ravcio"
- title: "Now Toronto"
author: "Starson17"
- title: "Press releases of the German government and EU Commission"
author: "malfi"
- version: 0.7.28
date: 2010-11-12
new features:
- title: "Update the version of the grahical toolkit (Qt 4.7.1) used in the calibre binary builds on windows and linux. This should result in a significant speed up for the calibre ebook viewer"
- title: "Driver for Nook Color, Eken M001"
- title: "Add a tweak to turn off double clicking to open viewer"
- title: "Catalog generation: Add indication when a book has no formats"
tickets: [7376]
- title: "Advanced search dialog: Add a tab to allow searching particular metadata fields easily"
- title: "Conversion pipeline: When using the Level x Table of Contents expressions, if a tag is empty but has a non-empty title attribute, use that instead of ignoring the tag"
bug fixes:
- title: "Comic metadata reader: Sort filenames aplhabetically when choosing an image for the cover"
tickets: [7488]
- title: "Bulk convert dialog: Hide useless restore defaults button."
tickets: [7471]
- title: "Conversion pipeline: Handle input documents that encode null bytes as HTML entities correctly"
tickets: [7355]
- title: "Fix some SONY readers not being detected on windows"
tickets: [7413]
- title: "MOBI Input: Fix images missing when converting MOBI news downloads created with Mobipocket reader"
tickets: [7455]
- title: "ODT Input: Handle hyperlinks to headings that have truncated destination specifiers correctly"
tickets: [7506]
- title: "Sony driver: Ignore invalid strings when updating XML database"
- title: "Content Server: Add day to displayed date in /mobile book listing"
- title: "MOBI Input: Do not generate filenames with only extensions if the MOBI file has no internal name"
tickets: [7481]
- title: "MOBI Input: Handle files that has the record sizes set incorrectly to a long integer"
tickets: [7472]
- title: "Fix not enough vertical space for text in the preferences dialog category listing"
- title: "Remove 'sort' from Search and replace destination fields and add it to source fields. S&R is no longer marked experimental"
- title: "Edit metadata dialog: Save dialog geometry on reject as well as on accept"
- title: "E-book viewer: Fix clicking entries in TOC that point to the currently loaded flow not scrolling view to the top of the document"
- title: "Fix bug in regex used to extract charset from <meta> tags"
- title: "MOBI Output: Add support for the <q> tag"
improved recipes:
- Zeit Online
- Gamespot Review
- Ploitika
- Pagina12
- Irish Times
- elektrolese
new recipes:
- title: "Handelsblatt and European Voice"
author: "malfi"
- title: "Polityka and Newsweek"
author: "Mateusz Kielar"
- title: "MarcTV"
author: "Marc Toensings"
- title: "Rolling Stone"
author: "Darko Miletic"
- title: "Vedomosti"
author: "Nikolai Kotchetkov"
- title: "Hola.com"
author: "bmsleight"
- title: "Dnevnik, Siol.net, MMC-RTV and Avto-magazon"
author: "BlonG"
- title: "SC Print Magazine"
author: "Tony Maro"
- title: "Diario Sport"
author: "Jefferson Frantz"
- version: 0.7.27
date: 2010-11-05
@ -44,6 +214,7 @@
tickets: [7356]
- title: "News download: Workaround lack of thread safety in python mechanize, causing corrupted network packets (degrading network performance) on Ubuntu Maverick 64bit kernels"
tickets: [7321]
- title: "Convert comments to HTML for book details panel in separate thread to make scrolling through the book list faster when large comments are present"

View File

@ -12,8 +12,8 @@ p.title {
p.author {
margin-top:0em;
margin-bottom:0em;
text-align: left;
text-indent: 1em;
text-align: center;
text-indent: 0em;
font-size:large;
}
@ -27,17 +27,28 @@ p.author_index {
}
p.tags {
margin-top:0em;
margin-top:0.5em;
margin-bottom:0em;
text-align: left;
text-indent: 1em;
font-size:small;
text-indent: 0.0in;
}
p.description {
text-align:left;
font-style:normal;
p.formats {
font-size:90%;
margin-top:0em;
margin-bottom:0.5em;
text-align: left;
text-indent: 0.0in;
}
div.description > p:first-child {
margin: 0 0 0 0;
text-indent: 0em;
}
div.description {
margin: 0 0 0 0;
text-indent: 1em;
}
p.date_index {
@ -81,6 +92,14 @@ p.unread_book {
text-indent:-2em;
}
p.wishlist_item {
text-align:left;
margin-top:0px;
margin-bottom:0px;
margin-left:2em;
text-indent:-2em;
}
p.date_read {
text-align:left;
margin-top:0px;
@ -104,3 +123,14 @@ hr.annotations_divider {
margin-top:0em;
margin-bottom:0em;
}
td.publisher, td.date {
font-weight:bold;
text-align:center;
}
td.rating {
text-align: center;
}
td.thumbnail img {
-webkit-box-shadow: 4px 4px 12px #999;
}

View File

@ -355,6 +355,25 @@ h2.library_name {
color: red;
}
#booklist > #pagelist { display: none; }
#goto_page_dialog ul {
list-style-type: none;
font-size: medium;
}
#goto_page_dialog li {
margin-bottom: 1.5ex;
}
#goto_page_dialog a {
text-decoration: none;
color: blue;
}
#goto_page_dialog a:hover {
color: red;
}
#booklist .left .ui-button-text {
font-size: medium;

View File

@ -4,7 +4,7 @@
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<title>..:: calibre library ::.. {title}</title>
<title>..:: calibre {library} ::.. {title}</title>
<meta http-equiv="X-UA-Compatible" content="IE=100" />
<link rel="icon" type="image/x-icon" href="http://calibre-ebook.com/favicon.ico" />
@ -41,7 +41,7 @@
<div class="area">
<div class="bubble">
<p><a href="{prefix}/browse" title="Return to top level"
>&rarr;&nbsp;home&nbsp;&larr;</a></p>
>&rarr;&nbsp;{home}&nbsp;&larr;</a></p>
</div>
</div>
<div id="nav-container">&nbsp;
@ -80,7 +80,7 @@
<form name="search_form" action="{prefix}/browse/search" method="get" accept-charset="UTF-8">
<input value="{initial_search}" type="text" title="Search" name="query"
class="search_input" />&nbsp;
<input type="submit" value="Search" title="Search" alt="Search" />
<input type="submit" value="{Search}" title="{Search}" alt="{Search}" />
</form>
</div>
<div>&nbsp;</div>
@ -96,5 +96,6 @@
</div>
</div>
<div id="book_details_dialog"></div>
<div id="goto_page_dialog"></div>
</body>
</html>

View File

@ -202,6 +202,23 @@ function previous_page() {
else last_page();
}
function gp_internal(id) {
var gp = $('#goto_page_dialog');
gp.dialog('close');
var elem = $("#booklist #" + id);
load_page(elem);
}
function goto_page() {
var gp = $('#goto_page_dialog');
var pl = $('#booklist > #pagelist');
gp.html(pl.html());
gp.dialog('option', 'title', pl.attr('title'));
gp.dialog('option', 'height', $(window).height() - 100);
gp.dialog('open');
}
function load_page(elem) {
if (elem.is(":visible")) return;
var ld = elem.find('.load_data');
@ -251,6 +268,12 @@ function booklist(hide_sort) {
modal: true,
show: 'slide'
});
$("#goto_page_dialog").dialog({
autoOpen: false,
modal: true,
show: 'slide'
});
first_page();
}

View File

@ -211,3 +211,9 @@ generate_cover_title_font = None
# Absolute path to a TTF font file to use as the font for the footer in the
# default cover
generate_cover_foot_font = None
# Behavior of doubleclick on the books list. Choices:
# open_viewer, do_nothing, edit_cell. Default: open_viewer.
# Example: doubleclick_on_library_view = 'do_nothing'
doubleclick_on_library_view = 'open_viewer'

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.0 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.4 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 861 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.0 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 423 B

View File

@ -0,0 +1,50 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
'''
180.com.uy
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Noticias(BasicNewsRecipe):
title = '180.com.uy'
__author__ = 'Gustavo Azambuja'
description = 'Noticias de Uruguay'
language = 'es'
timefmt = '[%a, %d %b, %Y]'
use_embedded_content = False
recursion = 5
encoding = 'utf-8'
remove_javascript = True
no_stylesheets = True
oldest_article = 2
max_articles_per_feed = 100
keep_only_tags = [dict(name='div', attrs={'class':'tef-md tef-md-seccion-sociedad'})]
remove_tags = [
dict(name=['object','link'])
]
remove_attributes = ['width','height', 'style', 'font', 'color']
extra_css = '''
h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
p {font-family:Arial,Helvetica,sans-serif;}
'''
feeds = [
(u'Titulares', u'http://www.180.com.uy/feed.php')
]
def get_cover_url(self):
return 'http://www.180.com.uy/tplef/img/logo.gif'
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -1,10 +1,8 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
'''
aljazeera.net
english.aljazeera.net
'''
from calibre.web.feeds.news import BasicNewsRecipe
@ -13,40 +11,58 @@ class AlJazeera(BasicNewsRecipe):
__author__ = 'Darko Miletic'
description = 'News from Middle East'
language = 'en'
publisher = 'Al Jazeera'
category = 'news, politics, middle east'
simultaneous_downloads = 1
delay = 4
oldest_article = 1
delay = 1
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
encoding = 'iso-8859-1'
remove_javascript = True
use_embedded_content = False
extra_css = """
body{font-family: Arial,sans-serif}
#ctl00_cphBody_dvSummary{font-weight: bold}
#dvArticleDate{font-size: small; color: #999999}
"""
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
html2lrf_options = [
'--comment', description
, '--category', category
, '--publisher', publisher
, '--ignore-tables'
keep_only_tags = [
dict(attrs={'id':['DetailedTitle','ctl00_cphBody_dvSummary','dvArticleDate']})
,dict(name='td',attrs={'class':'DetailedSummary'})
]
html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"\nlinearize_table=True'
keep_only_tags = [dict(name='div', attrs={'id':'ctl00_divContent'})]
remove_tags = [
dict(name=['object','link'])
dict(name=['object','link','table','meta','base','iframe','embed'])
,dict(name='td', attrs={'class':['MostActiveDescHeader','MostActiveDescBody']})
]
feeds = [(u'AL JAZEERA ENGLISH (AJE)', u'http://english.aljazeera.net/Services/Rss/?PostingId=2007731105943979989' )]
def get_article_url(self, article):
artlurl = article.get('link', None)
return artlurl.replace('http://english.aljazeera.net//','http://english.aljazeera.net/')
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll(face=True):
del item['face']
td = soup.find('td',attrs={'class':'DetailedSummary'})
if td:
td.name = 'div'
spn = soup.find('span',attrs={'id':'DetailedTitle'})
if spn:
spn.name='h1'
for itm in soup.findAll('span', attrs={'id':['dvArticleDate','ctl00_cphBody_lblDate']}):
itm.name = 'div'
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup

View File

@ -0,0 +1,47 @@
__license__ = 'GPL v3'
__copyright__ = '2010, BlonG'
'''
avto-magazin.si
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Dnevnik(BasicNewsRecipe):
title = u'Avto Magazin'
__author__ = u'BlonG'
description = u'Za avtomobilisti\xc4\x8dne frike, poznavalce in nedeljske \xc5\xa1oferje.'
oldest_article = 7
max_articles_per_feed = 20
labguage = 'sl'
no_stylesheets = True
use_embedded_content = False
language = 'sl'
conversion_options = {'linearize_tables' : True}
cover_url = 'https://sites.google.com/site/javno2010/home/avto_magazin_cover.jpg'
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
keep_only_tags = [
dict(name='div', attrs={'id':'_iprom_inStream'}),
# dict(name='div', attrs={'class':'entry-content'}),
]
remove_tags = [
dict(name='div', attrs={'id':'voteConfirmation'}),
dict(name='div', attrs={'id':'InsideVote'}),
dict(name='div', attrs={'class':'Zone234'}),
dict(name='div', attrs={'class':'Comments'}),
dict(name='div', attrs={'class':'sorodneNovice'}),
dict(name='div', attrs={'id':'footer'}),
]
feeds = [
(u'Novice', u'http://www.avto-magazin.si/rss/')
]

View File

@ -0,0 +1,58 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
'''
bitacora.com.uy
'''
from calibre.web.feeds.news import BasicNewsRecipe
class General(BasicNewsRecipe):
title = 'bitacora.com.uy'
__author__ = 'Gustavo Azambuja'
description = 'Noticias de Uruguay'
language = 'es'
timefmt = '[%a, %d %b, %Y]'
use_embedded_content = False
recursion = 5
encoding = 'iso-8859-1'
remove_javascript = True
no_stylesheets = True
oldest_article = 2
max_articles_per_feed = 100
keep_only_tags = [dict(id=['txt'])]
remove_tags = [
dict(name='div', attrs={'class':'tablafoot'}),
dict(name=['object','h4']),
dict(name=['object','link'])
]
remove_attributes = ['width','height', 'style', 'font', 'color']
extra_css = '''
h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
p {font-family:Arial,Helvetica,sans-serif;}
'''
feeds = [
(u'Titulares', u'http://www.bitacora.com.uy/anxml.cgi?15')
]
def get_cover_url(self):
cover_url = None
index = 'http://www.bitacora.com.uy'
soup = self.index_to_soup(index)
link_item = soup.find('img',attrs={'class':'imgtapa'})
if link_item:
cover_url = "http://www.bitacora.com.uy/"+link_item['src']
return cover_url
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -1,18 +1,22 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# -*- coding: utf-8 mode: python -*-
# Find the newest version of this recipe here:
# https://github.com/consti/BrandEins-Recipe/raw/master/brandeins.recipe
__license__ = 'GPL v3'
__copyright__ = '2010, Constantin Hofstetter <consti at consti.de>'
__version__ = '0.95'
__copyright__ = '2010, Constantin Hofstetter <consti at consti.de>, Steffen Siebert <calibre at steffensiebert.de>'
__version__ = '0.96'
''' http://brandeins.de - Wirtschaftsmagazin '''
import re
import string
from calibre.web.feeds.recipes import BasicNewsRecipe
class BrandEins(BasicNewsRecipe):
title = u'Brand Eins'
title = u'brand eins'
__author__ = 'Constantin Hofstetter'
description = u'Wirtschaftsmagazin'
publisher ='brandeins.de'
@ -22,11 +26,14 @@ class BrandEins(BasicNewsRecipe):
no_stylesheets = True
encoding = 'utf-8'
language = 'de'
publication_type = 'magazine'
needs_subscription = True
# 2 is the last full magazine (default)
# 1 is the newest (but not full)
# 3 is one before 2 etc.
which_ausgabe = 2
# This value can be set via the username field.
default_issue = 2
keep_only_tags = [dict(name='div', attrs={'id':'theContent'}), dict(name='div', attrs={'id':'sidebar'}), dict(name='div', attrs={'class':'intro'}), dict(name='p', attrs={'class':'bodytext'}), dict(name='div', attrs={'class':'single_image'})]
@ -61,17 +68,31 @@ class BrandEins(BasicNewsRecipe):
return soup
def get_cover(self, soup):
cover_url = None
cover_item = soup.find('div', attrs = {'class': 'cover_image'})
if cover_item:
cover_url = 'http://www.brandeins.de/' + cover_item.img['src']
return cover_url
def parse_index(self):
feeds = []
archive = "http://www.brandeins.de/archiv.html"
issue = self.default_issue
if self.username:
try:
issue = int(self.username)
except:
pass
soup = self.index_to_soup(archive)
latest_jahrgang = soup.findAll('div', attrs={'class': re.compile(r'\bjahrgang-latest\b') })[0].findAll('ul')[0]
pre_latest_issue = latest_jahrgang.findAll('a')[len(latest_jahrgang.findAll('a'))-self.which_ausgabe]
pre_latest_issue = latest_jahrgang.findAll('a')[len(latest_jahrgang.findAll('a'))-issue]
url = pre_latest_issue.get('href', False)
# Get the title for the magazin - build it out of the title of the cover - take the issue and year;
self.title = "Brand Eins "+ re.search(r"(?P<date>\d\d\/\d\d\d\d+)", pre_latest_issue.find('img').get('title', False)).group('date')
# Get month and year of the magazine issue - build it out of the title of the cover
self.timefmt = " " + re.search(r"(?P<date>\d\d\/\d\d\d\d)", pre_latest_issue.find('img').get('title', False)).group('date')
url = 'http://brandeins.de/'+url
# url = "http://www.brandeins.de/archiv/magazin/tierisch.html"
@ -83,6 +104,7 @@ class BrandEins(BasicNewsRecipe):
def brand_eins_parse_latest_issue(self, url):
soup = self.index_to_soup(url)
self.cover_url = self.get_cover(soup)
article_lists = [soup.find('div', attrs={'class':'subColumnLeft articleList'}), soup.find('div', attrs={'class':'subColumnRight articleList'})]
titles_and_articles = []
@ -123,3 +145,4 @@ class BrandEins(BasicNewsRecipe):
current_articles.append({'title': title, 'url': url, 'description': description, 'date':''})
titles_and_articles.append([chapter_title, current_articles])
return titles_and_articles

View File

@ -1,9 +1,11 @@
import re;
import re
from calibre.web.feeds.news import BasicNewsRecipe
class CNetJapan(BasicNewsRecipe):
title = u'CNET Japan'
oldest_article = 3
max_articles_per_feed = 30
__author__ = 'Hiroshi Miura'
feeds = [(u'cnet rss', u'http://feeds.japan.cnet.com/cnet/rss')]
language = 'ja'

View File

@ -0,0 +1,69 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
'''
Muy Interesante
'''
from calibre.web.feeds.news import BasicNewsRecipe
class General(BasicNewsRecipe):
title = 'Cosmopolitan'
__author__ = 'Gustavo Azambuja'
description = 'Revista Cosmopolitan, Edicion Espanola'
language = 'es'
timefmt = '[%a, %d %b, %Y]'
use_embedded_content = False
recursion = 1
encoding = 'utf8'
remove_javascript = True
no_stylesheets = True
conversion_options = {'linearize_tables': True}
oldest_article = 180
max_articles_per_feed = 100
keep_only_tags = [
dict(id=['contenido']),
dict(name='td', attrs={'class':['contentheading', 'txt_articulo']})
]
remove_tags = [
dict(name='div', attrs={'class':['breadcrumb', 'bloque1', 'article', 'bajo_title', 'tags_articles', 'otrosenlaces_title', 'otrosenlaces_parent', 'compartir']}),
dict(name='div', attrs={'id':'comment'}),
dict(name='table', attrs={'class':'pagenav'}),
dict(name=['object','link'])
]
remove_attributes = ['width','height', 'style', 'font', 'color']
extra_css = '''
h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
img {float:left; clear:both; margin:10px}
p {font-family:Arial,Helvetica,sans-serif;}
'''
feeds = [
(u'Articulos', u'http://feeds.feedburner.com/cosmohispano/FSSt')
]
def preprocess_html(self, soup):
attribs = [ 'style','font','valign'
,'colspan','width','height'
,'rowspan','summary','align'
,'cellspacing','cellpadding'
,'frames','rules','border'
]
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
item.name = 'div'
for attrib in attribs:
if item.has_key(attrib):
del item[attrib]
return soup
def get_cover_url(self):
index = 'http://www.cosmohispano.com/revista'
soup = self.index_to_soup(index)
link_item = soup.find('img',attrs={'class':'img_portada'})
if link_item:
cover_url = "http://www.cosmohispano.com"+link_item['src']
return cover_url

View File

@ -25,7 +25,7 @@ class Danas(BasicNewsRecipe):
remove_empty_feeds = True
extra_css = """ @font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
.article_description,body,.lokacija{font-family: Tahoma,Arial,Helvetica,sans1,sans-serif}
.article,.articledescription,body,.lokacija,.feed{font-family: Tahoma,Arial,Helvetica,sans1,sans-serif}
.nadNaslov,h1,.preamble{font-family: Georgia,"Times New Roman",Times,serif1,serif}
.antrfileText{border-left: 2px solid #999999;
margin-left: 0.8em;
@ -66,7 +66,7 @@ class Danas(BasicNewsRecipe):
keep_only_tags = [dict(name='div', attrs={'id':'left'})]
remove_tags = [
dict(name='div', attrs={'class':['width_1_4','metaClanka','baner']})
dict(name='div', attrs={'class':['width_1_4','metaClanka','baner','listaVesti','article_nav']})
,dict(name='div', attrs={'id':'comments'})
,dict(name=['object','link','iframe','meta'])
]

View File

@ -0,0 +1,61 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe
class deredactie(BasicNewsRecipe):
title = u'Deredactie.be'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
cover_url = 'http://www.deredactie.be/polopoly_fs/1.510827!image/2710428628.gif'
language = 'de'
keep_only_tags = []
__author__ = 'malfi'
keep_only_tags.append(dict(name = 'div', attrs = {'id': 'articlehead'}))
keep_only_tags.append(dict(name = 'div', attrs = {'id': 'articlebody'}))
remove_tags = []
remove_tags.append(dict(name = 'div', attrs = {'id': 'story'}))
remove_tags.append(dict(name = 'div', attrs = {'id': 'useractions'}))
remove_tags.append(dict(name = 'hr'))
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
def parse_index(self):
categories = []
catnames = {}
soup = self.index_to_soup('http://www.deredactie.be/cm/vrtnieuws.deutsch')
for elem in soup.findAll('li', attrs={'id' : re.compile("^navItem[2-9]") }):
a = elem.find('a', href=True)
m = re.search('(?<=/)[^/]*$', a['href'])
cat = str(m.group(0))
categories.append(cat)
catnames[cat] = a['title']
self.log("found cat %s\n" % catnames[cat])
feeds = []
for cat in categories:
articles = []
soup = self.index_to_soup('http://www.deredactie.be/cm/vrtnieuws.deutsch/'+cat)
for a in soup.findAll('a',attrs={'href' : re.compile("deutsch.*/[0-9][0-9][0-9][0-9][0-9][0-9]_")}):
skip_this_article = False
url = a['href'].strip()
if url.startswith('/'):
url = 'http://www.deredactie.be' + url
myarticle=({'title':self.tag_to_string(a), 'url':url, 'description':'', 'date':''})
for article in articles :
if article['url'] == url :
skip_this_article = True
self.log("SKIPPING DUP %s" % url)
break
if skip_this_article :
continue;
articles.append(myarticle)
self.log("Adding URL %s\n" %url)
if articles:
feeds.append((catnames[cat], articles))
return feeds

View File

@ -0,0 +1,42 @@
from calibre.web.feeds.news import BasicNewsRecipe
class DiarioSport(BasicNewsRecipe):
title = u'Diario Sport'
oldest_article = 2
max_articles_per_feed = 75
__author__ = 'Jefferson Frantz'
description = 'Todas las noticias del Barça y del mundo del deporte en general'
timefmt = ' [%d %b, %Y]'
language = 'es'
no_stylesheets = True
feeds = [(u'Sport', u'http://feeds.feedburner.com/sport/ultimahora')]
extra_css = '''
h2{font-family: serif; font-size: small; font-weight: bold; color: #000000; text-align: justify}
'''
keep_only_tags = [dict(name='div', attrs={'id':['noticiasMedio']})]
remove_tags = [
dict(name=['object','link','script','ul'])
,dict(name='div', attrs={'id':['scrAdSense','herramientas2','participacion','participacion2','bloque1resultados','bloque2resultados','cont_vinyetesAnt','tinta','noticiasSuperior','cintillopublicidad2']})
,dict(name='p', attrs={'class':['masinformacion','hora']})
,dict(name='a', attrs={'class':["'link'"]})
,dict(name='div', attrs={'class':['addthis_toolbox addthis_default_style','firma','pretitularnoticia']})
,dict(name='form', attrs={'id':['formularioDeBusquedaAvanzada']})
]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup
def postprocess_html(self, soup, first_fetch):
img = soup.find('img',src='/img/videos/mascaravideo.png')
if not img is None:
img.extract()
return soup

View File

@ -0,0 +1,63 @@
__license__ = 'GPL v3'
__copyright__ = '2010, BlonG'
'''
dnevnik.si
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Dnevnik(BasicNewsRecipe):
title = u'Dnevnik.si'
__author__ = u'BlonG'
description = u'''Dnevnik je \u010dasnik z ve\u010d kot polstoletno zgodovino.
Pod sloganom \xbb\u017divljenje ima besedo\xab na svojih straneh prina\u0161a
bralcem bogastvo informacij, komentarjev in kolumen in raznovrstnost
pogledov, zaznamovanih z odgovornostjo do posameznika in \u0161ir\u0161e
dru\u017ebe.'''
oldest_article = 3
max_articles_per_feed = 20
language = 'sl'
no_stylesheets = True
use_embedded_content = False
cover_url = 'https://sites.google.com/site/javno2010/home/dnevnik_cover.jpg'
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
keep_only_tags = [
dict(name='div', attrs={'id':'_iprom_inStream'}),
dict(name='div', attrs={'class':'entry-content'}),
]
remove_tags = [
dict(name='div', attrs={'class':'fb_article_top'}),
dict(name='div', attrs={'class':'related'}),
dict(name='div', attrs={'class':'fb_article_foot'}),
dict(name='div', attrs={'class':'spreading'}),
dict(name='dl', attrs={'class':'ad'}),
dict(name='p', attrs={'class':'report'}),
dict(name='div', attrs={'class':'hfeed comments'}),
dict(name='dl', attrs={'id':'entryPanel'}),
dict(name='dl', attrs={'class':'infopush ip_wide'}),
dict(name='div', attrs={'class':'sidebar'}),
dict(name='dl', attrs={'class':'bottom'}),
dict(name='div', attrs={'id':'footer'}),
]
feeds = [
(u'Slovenija', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=13')
,(u'Svet', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=14')
,(u'EU', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=116')
,(u'Poslovni dnevnik', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=5')
,(u'Kronika', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=15')
,(u'Kultura', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=17')
,(u'Zdravje', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=18')
,(u'Znanost in IT', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=19')
,(u'(Ne)verjetno', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=20')
,(u'E-strada', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=21')
,(u'Svet vozil', u'http://www.dnevnik.si/rss/?articleType=1&articleSection=22')
]

View File

@ -0,0 +1,67 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
'''
http://www.elpais.com.uy/
'''
from calibre.web.feeds.news import BasicNewsRecipe
class General(BasicNewsRecipe):
title = 'Diario El Pais'
__author__ = 'Gustavo Azambuja'
description = 'Noticias | Uruguay'
language = 'es'
timefmt = '[%a, %d %b, %Y]'
use_embedded_content = False
recursion = 2
encoding = 'iso-8859-1'
remove_javascript = True
no_stylesheets = True
oldest_article = 2
max_articles_per_feed = 100
keep_only_tags = [
dict(name='h1'),
dict(name='div', attrs={'id':'Contenido'})
]
remove_tags = [
dict(name='div', attrs={'class':['date_text', 'comments', 'form_section', 'share_it']}),
dict(name='div', attrs={'id':['relatedPosts', 'spacer', 'banner_izquierda', 'right_container']}),
dict(name='p', attrs={'class':'FacebookLikeButton'}),
dict(name=['object','form']),
dict(name=['object','table']) ]
extra_css = '''
h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
p {font-family:Arial,Helvetica,sans-serif;}
'''
feeds = [
(u'Ultimo Momento', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=umomento'),
(u'Editorial', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=editorial'),
(u'Nacional', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=nacional'),
(u'Internacional', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=internacional'),
(u'Espectaculos', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=espectaculos'),
(u'Deportes', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=deportes'),
(u'Ciudades', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=ciudades'),
(u'Economia', u'http://www.elpais.com.uy/formatos/rss/index.asp?seccion=economia')
]
def get_cover_url(self):
cover_url = None
index = 'http://www.elpais.com.uy'
soup = self.index_to_soup(index)
link_item = soup.find('div',attrs={'class':'boxmedio box257'})
print link_item
if link_item:
cover_url = 'http://www.elpais.com.uy'+link_item.img['src']
return cover_url
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -10,6 +10,8 @@ from calibre.web.feeds.news import BasicNewsRecipe
class EndgadgetJapan(BasicNewsRecipe):
title = u'Endgadget\u65e5\u672c\u7248'
language = 'ja'
__author__ = 'Hiroshi Miura'
cover_url = 'http://skins18.wincustomize.com/1/49/149320/29/7578/preview-29-7578.jpg'
masthead_url = 'http://www.blogsmithmedia.com/japanese.engadget.com/media/eng-jp-logo-t.png'
oldest_article = 7

View File

@ -0,0 +1,58 @@
from calibre.web.feeds.news import BasicNewsRecipe
LANGUAGE = 'de'
def feedlink(num):
return u'http://europa.eu/rapid/syndication/QuickRSSAction.do?id='+\
str(num)+'&lang='+ LANGUAGE
class EUCommissionPress(BasicNewsRecipe):
title = u'Pressemitteilungen der EU Kommission pro Politikbereich'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
cover_url = 'http://ec.europa.eu/wel/template_2007/images/banners/banner-background.jpg'
__author__ = 'malfi'
language = LANGUAGE
keep_only_tags = []
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'pressReleaseContentMain'}))
remove_tags = []
feeds = [
(u'Pressemitteilung des Tages',feedlink(64)),
(u'Presidency',feedlink(137)),
(u'Foreign affairs and security policy',feedlink(138)),
(u'Agriculture and rural development',feedlink(139)),
(u'Budget and financial programming ',feedlink(140)),
(u'Climate action',feedlink(141)),
(u'Competition',feedlink(142)),
(u'Development',feedlink(143)),
(u'Digital agenda',feedlink(144)),
(u'Economic and monetary affairs',feedlink(145)),
(u'Education, culture, multilingualism and youth ',feedlink(146)),
(u'Employment, social Affairs and inclusion ',feedlink(147)),
(u'Energy',feedlink(148)),
(u'Enlargment and European neighbourhood policy ',feedlink(149)),
(u'Environment',feedlink(150)),
(u'Health and consumer policy',feedlink(151)),
(u'Home affairs',feedlink(152)),
(u'Industry and entrepreneurship',feedlink(153)),
(u'Inter-Institutional relations and administration',feedlink(154)),
(u'Internal market and services',feedlink(155)),
(u'International cooperation, humanitarian aid and crisis response',feedlink(156)),
(u'Justice, fundamental rights and citizenship',feedlink(157)),
(u'Maritime affairs and fisheries',feedlink(158)),
(u'Regional policy',feedlink(159)),
(u'Research and innovation',feedlink(160)),
(u'Taxation and customs union, audit and anti-fraud',feedlink(161)),
(u'Trade',feedlink(162)),
(u'Transport',feedlink(163))
]
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''

View File

@ -0,0 +1,51 @@
from calibre.web.feeds.news import BasicNewsRecipe
class EuropeanVoice(BasicNewsRecipe):
title = u'European Voice'
__author__ = 'malfi'
oldest_article = 14
max_articles_per_feed = 100
no_stylesheets = True
cover_url = 'http://www.europeanvoice.com/Css/images/logo.gif'
language = 'en'
keep_only_tags = [dict(name='div', attrs={'id':'articleLeftColumn'})]
remove_tags = [dict(name='div', attrs={'id':'BreadCrump'})]
feeds = [
(u'Whole site ',u'http://www.europeanvoice.com/Rss/2.xml'),
(u'News and analysis',u'http://www.europeanvoice.com/Rss/6.xml'),
(u'Comment',u'http://www.europeanvoice.com/Rss/7.xml'),
(u'Special reports',u'http://www.europeanvoice.com/Rss/5.xml'),
(u'People',u'http://www.europeanvoice.com/Rss/8.xml'),
(u'Career',u'http://www.europeanvoice.com/Rss/11.xml'),
(u'Policies',u'http://www.europeanvoice.com/Rss/4.xml'),
(u'EVents',u'http://www.europeanvoice.com/Rss/10.xml'),
(u'Policies - Economics',u'http://www.europeanvoice.com/Rss/31.xml'),
(u'Policies - Business',u'http://www.europeanvoice.com/Rss/19.xml'),
(u'Policies - Trade',u'http://www.europeanvoice.com/Rss/25.xml'),
(u'Policies - Information society',u'http://www.europeanvoice.com/Rss/20.xml'),
(u'Policies - Energy',u'http://www.europeanvoice.com/Rss/15.xml'),
(u'Policies - Transport',u'http://www.europeanvoice.com/Rss/18.xml'),
(u'Policies - Climate change',u'http://www.europeanvoice.com/Rss/16.xml'),
(u'Policies - Environment',u'http://www.europeanvoice.com/Rss/17.xml'),
(u'Policies - Farming & food',u'http://www.europeanvoice.com/Rss/23.xml'),
(u'Policies - Health & society',u'http://www.europeanvoice.com/Rss/24.xml'),
(u'Policies - Justice',u'http://www.europeanvoice.com/Rss/29.xml'),
(u'Policies - Foreign affairs',u'http://www.europeanvoice.com/Rss/27.xml')
]
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
def print_version(self, url):
return url + '?bPrint=1'
def preprocess_html(self, soup):
denied = soup.findAll(True,text='Subscribers')
if denied:
raise Exception('Article skipped, because content can only be seen with subscription')
return soup

View File

@ -0,0 +1,100 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
'''
http://freeway.com.uy
'''
from calibre.web.feeds.news import BasicNewsRecipe
class General(BasicNewsRecipe):
title = 'freeway.com.uy'
__author__ = 'Gustavo Azambuja'
description = 'Revista Freeway, Montevideo, Uruguay'
language = 'es'
timefmt = '[%a, %d %b, %Y]'
use_embedded_content = False
recursion = 1
encoding = 'utf8'
remove_javascript = True
no_stylesheets = True
conversion_options = {'linearize_tables': True}
oldest_article = 180
max_articles_per_feed = 100
keep_only_tags = [
dict(id=['contenido']),
dict(name='a', attrs={'class':'titulo_art_ppal'}),
dict(name='img', attrs={'class':'recuadro'}),
dict(name='td', attrs={'class':'txt_art_ppal'})
]
remove_tags = [
dict(name=['object','link'])
]
remove_attributes = ['width','height', 'style', 'font', 'color']
extra_css = '''
h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
img {float:left; clear:both; margin:10px}
p {font-family:Arial,Helvetica,sans-serif;}
'''
def parse_index(self):
feeds = []
for title, url in [('Articulos', 'http://freeway.com.uy/revista/')]:
articles = self.art_parse_section(url)
if articles:
feeds.append((title, articles))
return feeds
def art_parse_section(self, url):
soup = self.index_to_soup(url)
div = soup.find(attrs={'id': 'tbl_1'})
current_articles = []
for tag in div.findAllNext(attrs = {'class': 'ancho_articulos'}):
if tag.get('class') == 'link-list-heading':
break
for td in tag.findAll('td'):
a = td.find('a', attrs= {'class': 'titulo_articulos'})
if a is None:
continue
title = self.tag_to_string(a)
url = a.get('href', False)
if not url or not title:
continue
if url.startswith('/'):
url = 'http://freeway.com.uy'+url
p = td.find('p', attrs= {'class': 'txt_articulos'})
description = self.tag_to_string(p)
self.log('\t\tFound article:', title)
self.log('\t\t\t', url)
self.log('\t\t\t', description)
current_articles.append({'title': title, 'url': url, 'description':description, 'date':''})
return current_articles
def preprocess_html(self, soup):
attribs = [ 'style','font','valign'
,'colspan','width','height'
,'rowspan','summary','align'
,'cellspacing','cellpadding'
,'frames','rules','border'
]
for item in soup.body.findAll(name=['table','td','tr','th','caption','thead','tfoot','tbody','colgroup','col']):
item.name = 'div'
for attrib in attribs:
if item.has_key(attrib):
del item[attrib]
return soup
def get_cover_url(self):
#index = 'http://www.cosmohispano.com/revista'
#soup = self.index_to_soup(index)
#link_item = soup.find('img',attrs={'class':'img_portada'})
#if link_item:
# cover_url = "http://www.cosmohispano.com"+link_item['src']
return 'http://freeway.com.uy/_upload/_n_foto_grande/noticia_1792_tapanoviembre2010.jpg'

View File

@ -1,5 +1,5 @@
__license__ = 'GPL v3'
__author__ = u'Marc T\xf6nsing'
__author__ = u'Marc Toensing'
from calibre.web.feeds.news import BasicNewsRecipe
@ -17,6 +17,7 @@ class GamespotCom(BasicNewsRecipe):
no_javascript = True
feeds = [
('All Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5'),
('PC Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=5'),
('XBOX 360 Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1029'),
('Wii Reviews', 'http://www.gamespot.com/rss/game_updates.php?type=5&platform=1031'),
@ -37,5 +38,3 @@ class GamespotCom(BasicNewsRecipe):
def get_article_url(self, article):
return article.get('link') + '?print=1'

View File

@ -0,0 +1,28 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe
class GermanGovermentPress(BasicNewsRecipe):
title = u'Pressemitteilungen der Bundesregierung'
oldest_article = 14
__author__ = 'malfi'
max_articles_per_feed = 100
no_stylesheets = True
cover_url = 'http://www.bundesregierung.de/static/images/logoBR.gif'
language = 'de'
keep_only_tags = []
keep_only_tags.append(dict(name = 'h2'))
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'textblack'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'subtitle'}))
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'text'}))
remove_tags = []
feeds = [ (u'Pressemitteilungen',u'http://www.bundesregierung.de/Webs/Breg/DE/Service/RSS/Functions/bundesregierungPressemitteilungenRSS20,templateId=renderNewsfeed.rdf') ]
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
def print_version(self, url):
m = re.search(r'^(.*).html$', url)
return str(m.group(1)) + ',layoutVariant=Druckansicht.html'

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__copyright__ = '2010, Szing'
__docformat__ = 'restructuredtext en'
'''
@ -10,49 +10,52 @@ globeandmail.com
from calibre.web.feeds.news import BasicNewsRecipe
class GlobeAndMail(BasicNewsRecipe):
title = u'Globe and Mail'
language = 'en_CA'
__author__ = 'Kovid Goyal'
class AdvancedUserRecipe1287083651(BasicNewsRecipe):
title = u'Globe & Mail'
__license__ = 'GPL v3'
__author__ = 'Szing'
oldest_article = 2
max_articles_per_feed = 10
no_stylesheets = True
extra_css = '''
h3 {font-size: 22pt; font-weight:bold; margin:0px; padding:0px 0px 8pt 0px;}
h4 {margin-top: 0px;}
#byline { font-family: monospace; font-weight:bold; }
#placeline {font-weight:bold;}
#credit {margin-top:0px;}
.tag {font-size: 22pt;}'''
description = 'Canada\'s national newspaper'
keep_only_tags = [dict(name='article')]
remove_tags = [dict(name='aside'),
dict(name='footer'),
dict(name='div', attrs={'class':(lambda x: isinstance(x, (str,unicode)) and 'articlecommentcountholder' in x.split(' '))}),
dict(name='ul', attrs={'class':(lambda x: isinstance(x, (str,unicode)) and 'articletoolbar' in x.split(' '))}),
]
max_articles_per_feed = 100
encoding = 'utf8'
publisher = 'Globe & Mail'
language = 'en_CA'
extra_css = 'p.meta {font-size:75%}\n .redtext {color: red;}\n .byline {font-size: 70%}'
feeds = [
(u'Latest headlines', u'http://www.theglobeandmail.com/?service=rss'),
(u'Top stories', u'http://www.theglobeandmail.com/?service=rss&feed=topstories'),
(u'National', u'http://www.theglobeandmail.com/news/national/?service=rss'),
(u'Politics', u'http://www.theglobeandmail.com/news/politics/?service=rss'),
(u'World', u'http://www.theglobeandmail.com/news/world/?service=rss'),
(u'Top National Stories', u'http://www.theglobeandmail.com/news/national/?service=rss'),
(u'Business', u'http://www.theglobeandmail.com/report-on-business/?service=rss'),
(u'Opinions', u'http://www.theglobeandmail.com/news/opinions/?service=rss'),
(u'Columnists', u'http://www.theglobeandmail.com/news/opinions/columnists/?service=rss'),
(u'Globe Investor', u'http://www.theglobeandmail.com/globe-investor/?service=rss'),
(u'Sports', u'http://www.theglobeandmail.com/sports/?service=rss'),
(u'Commentary', u'http://www.theglobeandmail.com/report-on-business/commentary/?service=rss'),
(u'Blogs', u'http://www.theglobeandmail.com/blogs/?service=rss'),
(u'Facts & Arguments', u'http://www.theglobeandmail.com/life/facts-and-arguments/?service=rss'),
(u'Technology', u'http://www.theglobeandmail.com/news/technology/?service=rss'),
(u'Investing', u'http://www.theglobeandmail.com/globe-investor/?service=rss'),
(u'Top Polical Stories', u'http://www.theglobeandmail.com/news/politics/?service=rss'),
(u'Arts', u'http://www.theglobeandmail.com/news/arts/?service=rss'),
(u'Life', u'http://www.theglobeandmail.com/life/?service=rss'),
(u'Blogs', u'http://www.theglobeandmail.com/blogs/?service=rss'),
(u'Real Estate', u'http://www.theglobeandmail.com/real-estate/?service=rss'),
(u'Auto', u'http://www.theglobeandmail.com/auto/?service=rss')
(u'Auto', u'http://www.theglobeandmail.com/sports/?service=rss'),
(u'Sports', u'http://www.theglobeandmail.com/auto/?service=rss')
]
def get_article_url(self, article):
url = BasicNewsRecipe.get_article_url(self, article)
if '/video/' not in url:
return url
keep_only_tags = [
dict(name='h1'),
dict(name='h2', attrs={'id':'articletitle'}),
dict(name='p', attrs={'class':['leadText', 'meta', 'leadImage', 'redtext byline', 'bodyText']}),
dict(name='div', attrs={'class':['news','articlemeta','articlecopy']}),
dict(name='id', attrs={'class':'article'}),
dict(name='table', attrs={'class':'todays-market'}),
dict(name='header', attrs={'id':'leadheader'})
]
remove_tags = [
dict(name='div', attrs={'id':['tabInside', 'ShareArticles', 'topStories']})
]
#this has to be here or the text in the article appears twice.
remove_tags_after = [dict(id='article')]
#Use the mobile version rather than the web version
def print_version(self, url):
return url + '&service=mobile'

View File

@ -0,0 +1,47 @@
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import re
class AdvancedUserRecipe1283848012(BasicNewsRecipe):
description = 'This is Globes.co.il.'
cover_url = 'http://www.the7eye.org.il/SiteCollectionImages/BAKTANA/arye_avnery_010709_377.jpg'
title = u'Globes'
language = 'he'
__author__ = 'marbs'
extra_css='img {max-width:100%;} body{direction: rtl;max-width:100%;}title{direction: rtl; } article_description{direction: rtl; }, a.article{direction: rtl;max-width:100%;} calibre_feed_description{direction: rtl; }'
simultaneous_downloads = 5
remove_javascript = True
timefmt = '[%a, %d %b, %Y]'
oldest_article = 1
max_articles_per_feed = 100
remove_attributes = ['width','style']
feeds = [(u'שוק ההון', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=585'),
(u'נדל"ן', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=607'),
(u'וול סטריט ושווקי העולם', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=1225'),
(u'ניתוח טכני', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=1294'),
(u'היי טק', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=594'),
(u'נתח שוק וצרכנות', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=821'),
(u'דין וחשבון', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=829'),
(u'רכב', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=3220'),
(u'דעות', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=845'),
(u'קניון המניות - טור שבועי', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=3175'),
(u'סביבה', u'http://www.globes.co.il/webservice/rss/rssfeeder.asmx/FeederNode?iID=3221')]
def print_version(self, url):
split1 = url.split("=")
print_url = 'http://www.globes.co.il/serve/globes/printwindow.asp?did=' + split1[1]
return print_url
def preprocess_html(self, soup):
soup.find('tr',attrs={'bgcolor':'black'}).findPrevious('tr').extract()
soup.find('tr',attrs={'bgcolor':'black'}).extract()
return soup
def fixChars(self,string):
# Replace lsquo (\x91)
fixed = re.sub("■","■",string)
return fixed

View File

@ -0,0 +1,41 @@
import re
from calibre.web.feeds.news import BasicNewsRecipe
class Handelsblatt(BasicNewsRecipe):
title = u'Handelsblatt'
__author__ = 'malfi'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
cover_url = 'http://www.handelsblatt.com/images/logo/logo_handelsblatt.com.png'
language = 'de'
keep_only_tags = []
keep_only_tags.append(dict(name = 'div', attrs = {'class': 'structOneCol'}))
keep_only_tags.append(dict(name = 'div', attrs = {'id': 'fullText'}))
remove_tags = [dict(name='img', attrs = {'src': 'http://www.handelsblatt.com/images/icon/loading.gif'})]
feeds = [
(u'Handelsblatt Exklusiv',u'http://www.handelsblatt.com/rss/exklusiv'),
(u'Handelsblatt Top-Themen',u'http://www.handelsblatt.com/rss/top-themen'),
(u'Handelsblatt Schlagzeilen',u'http://www.handelsblatt.com/rss/ticker/'),
(u'Handelsblatt Finanzen',u'http://www.handelsblatt.com/rss/finanzen/'),
(u'Handelsblatt Unternehmen',u'http://www.handelsblatt.com/rss/unternehmen/'),
(u'Handelsblatt Politik',u'http://www.handelsblatt.com/rss/politik/'),
(u'Handelsblatt Technologie',u'http://www.handelsblatt.com/rss/technologie/'),
(u'Handelsblatt Meinung',u'http://www.handelsblatt.com/rss/meinung'),
(u'Handelsblatt Magazin',u'http://www.handelsblatt.com/rss/magazin/'),
(u'Handelsblatt Weblogs',u'http://www.handelsblatt.com/rss/blogs')
]
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
def print_version(self, url):
m = re.search('(?<=;)[0-9]*', url)
return u'http://www.handelsblatt.com/_b=' + str(m.group(0)) + ',_p=21,_t=ftprint,doc_page=0;printpage'

View File

@ -0,0 +1,38 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1287519083(BasicNewsRecipe):
title = u'Hannoversche Allgemeine Zeitung'
oldest_article = 1
__author__ = 'Artemis'
max_articles_per_feed = 30
language = 'de'
no_stylesheets = True
feeds = [
#(u'Schlagzeilen', u'http://www.haz.de/rss/feed/haz_schlagzeilen'),
(u'Politik', u'http://www.haz.de/rss/feed/haz_politik'),
(u'Wirtschaft', u'http://www.haz.de/rss/feed/haz_wirtschaft'),
(u'Panorama', u'http://www.haz.de/rss/feed/haz_panorama'),
(u'Wissen', u'http://www.haz.de/rss/feed/haz_wissen'),
(u'Kultur', u'http://www.haz.de/rss/feed/haz_kultur'),
(u'Sp\xe4tvorstellung', u'http://www.haz.de/rss/feed/haz_spaetvorstellung'),
(u'Hannover & Region', u'http://www.haz.de/rss/feed/haz_hannoverregion'),
(u'Netzgefl\xfcster', u'http://www.haz.de/rss/feed/haz_netzgefluester'),
(u'Meinung', u'http://www.haz.de/rss/feed/haz_meinung'),
(u'ZiSH', u'http://www.haz.de/rss/feed/haz_zish'),
(u'Medien', u'http://www.haz.de/rss/feed/haz_medien'),
#(u'Sport', u'http://www.haz.de/rss/feed/haz_sport'),
#(u'Hannover 96', u'http://www.haz.de/rss/feed/haz_hannover96')
]
remove_tags_before =dict(id='modul_artikel')
remove_tags_after =dict(id='articlecontent')
remove_tags = dict(id='articlesidebar')
remove_tags = [
dict(name='div', attrs={'class':['articlecomment',
'articlebookmark', 'teaser_anzeige', 'teaser_umfrage',
'navigation', 'subnavigation']})
]

View File

@ -0,0 +1,38 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2010, Brendan Sleight <bms.calibre at barwap.com>'
'''
hola.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Hackaday(BasicNewsRecipe):
title = u'Hola'
__author__ = 'bmsleight'
description = 'diario de actualidad, moda y belleza.'
oldest_article = 10
max_articles_per_feed = 100
no_stylesheets = True
language = 'es'
use_embedded_content = False
keep_only_tags = [
dict(name='div', attrs={'id':'cuerpo'})
]
feeds = [
(u'Famosos' , u'http://www.hola.com/famosos/rss.xml' ),
(u'Realeza' , u'http://www.hola.com/realeza/rss.xml' ),
(u'Cine' , u'http://www.hola.com/cine/rss.xml' ),
(u'Música' , u'http://www.hola.com/musica/rss.xml' ),
(u'Moda y modelos' , u'http://www.hola.com/moda/portada/rss.xml' ),
(u'Belleza y salud', u'http://www.hola.com/belleza/portada/rss.xml' ),
(u'Niños' , u'http://www.hola.com/ninos/rss.xml' ),
(u'Todas las noticias', u'http://int2.hola.com/app/feeds/rss_hola.php'),
]
def get_article_url(self, article):
url = article.get('guid', None)
return url

View File

@ -13,7 +13,6 @@ class IrishTimes(BasicNewsRecipe):
language = 'en_IE'
timefmt = ' (%A, %B %d, %Y)'
oldest_article = 3
no_stylesheets = True
simultaneous_downloads= 1
@ -33,10 +32,10 @@ class IrishTimes(BasicNewsRecipe):
('Letters', 'http://www.irishtimes.com/feeds/rss/newspaper/letters.rss'),
]
def print_version(self, url):
if url.count('rss.feedsportal.com'):
u = url.replace('0Bhtml/story01.htm','_pf0Bhtml/story01.htm')
u = 'http://www.irishtimes.com' + \
(((url[69:].replace('0C','/')).replace('0A','0'))).replace('0Bhtml/story01.htm','_pf.html')
else:
u = url.replace('.html','_pf.html')
return u

View File

@ -6,6 +6,8 @@ __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
www.jiji.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class JijiDotCom(BasicNewsRecipe):
title = u'\u6642\u4e8b\u901a\u4fe1'
__author__ = 'Hiroshi Miura'

View File

@ -0,0 +1,48 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
'''
ladiaria.com.uy
'''
from calibre.web.feeds.news import BasicNewsRecipe
class General(BasicNewsRecipe):
title = 'La Diaria'
__author__ = 'Gustavo Azambuja'
description = 'Noticias de Uruguay'
language = 'es'
timefmt = '[%a, %d %b, %Y]'
use_embedded_content = False
recursion = 5
encoding = 'utf8'
remove_javascript = True
no_stylesheets = True
oldest_article = 2
max_articles_per_feed = 100
keep_only_tags = [dict(id=['article'])]
remove_tags = [
dict(name='div', attrs={'class':['byline', 'hr', 'titlebar', 'volver-arriba-right']}),
dict(name='div', attrs={'id':'discussion'}),
dict(name=['object','link'])
]
extra_css = '''
h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
p {font-family:Arial,Helvetica,sans-serif;}
'''
feeds = [
(u'Articulos', u'http://ladiaria.com/feeds/articulos')
]
def get_cover_url(self):
return 'http://ladiaria.com/edicion/imagenportada/'
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -54,10 +54,7 @@ class LaJornada_mx(BasicNewsRecipe):
preprocess_regexps = [
(re.compile( r'<div class="inicial">(.*)</div><p class="s-s">'
,re.DOTALL|re.IGNORECASE)
,lambda match: '<p class="inicial">' + match.group(1) + '</p><p class="s-s">'),
(re.compile( r'<q>(.*?)</q>'
,re.DOTALL|re.IGNORECASE)
,lambda match: '"' + match.group(1) + '"')
,lambda match: '<p class="inicial">' + match.group(1) + '</p><p class="s-s">')
]
keep_only_tags = [

View File

@ -8,7 +8,7 @@ from calibre import strftime
from calibre.web.feeds.news import BasicNewsRecipe
class LaRazon_Bol(BasicNewsRecipe):
title = 'La Razón - Bolivia'
title = u'La Razón - Bolivia'
__author__ = 'Darko Miletic'
description = 'El diario nacional de Bolivia'
publisher = 'Praxsis S.R.L.'

View File

@ -6,6 +6,8 @@ __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
www.mainichi.jp
'''
from calibre.web.feeds.news import BasicNewsRecipe
class MainichiDailyNews(BasicNewsRecipe):
title = u'\u6bce\u65e5\u65b0\u805e'
__author__ = 'Hiroshi Miura'

View File

@ -1,3 +1,5 @@
from calibre.web.feeds.news import BasicNewsRecipe
class MainichiDailyITNews(BasicNewsRecipe):
title = u'\u6bce\u65e5\u65b0\u805e(IT&\u5bb6\u96fb)'
__author__ = 'Hiroshi Miura'

View File

@ -0,0 +1,35 @@
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
'''
Fetch MarcTV.
'''
from calibre.web.feeds.news import BasicNewsRecipe
class MarcTVde(BasicNewsRecipe):
title = 'Marc Toensings Visionen'
description = 'Marc Toensings Visionen'
language = 'de'
__author__ = 'Marc Toensing'
max_articles_per_feed = 40
oldest_article = 665
use_embedded_content = False
remove_tags = []
keep_only_tags = dict(name='div', attrs={'class':["content"]})
feeds = [(u'Spiele', u'http://feeds.feedburner.com/marctv/spiele'), (u'Leben', u'http://feeds.feedburner.com/marctv/leben'), (u'Medien', u'http://feeds.feedburner.com/marctv/medien')]
extra_css = '.#wrapper .entry p img{width:620px; height: 270px;}'
def get_cover_url(self):
return 'http://marctv.de/marctv.png'

View File

@ -3,13 +3,28 @@ __copyright__ = '2010, Eddie Lau'
'''
modified from Singtao Toronto calibre recipe by rty
Change Log:
2010/11/22: add English section, remove eco-news section which is not updated daily, correct
ordering of articles
2010/11/12: add news image and eco-news section
2010/11/08: add parsing of finance section
2010/11/06: temporary work-around for Kindle device having no capability to display unicode
in section/article list.
2010/10/31: skip repeated articles in section pages
'''
import datetime
import os, datetime, re
from calibre.web.feeds.recipes import BasicNewsRecipe
from contextlib import nested
class AdvancedUserRecipe1278063072(BasicNewsRecipe):
from calibre import __appname__, strftime
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.ebooks.metadata.opf2 import OPFCreator
from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.metadata import MetaInformation
from calibre.utils.date import now as nowf
class MPHKRecipe(BasicNewsRecipe):
title = 'Ming Pao - Hong Kong'
oldest_article = 1
max_articles_per_feed = 100
@ -24,27 +39,131 @@ class AdvancedUserRecipe1278063072(BasicNewsRecipe):
encoding = 'Big5-HKSCS'
recursions = 0
conversion_options = {'linearize_tables':True}
extra_css = 'img {display: block; margin-left: auto; margin-right: auto; margin-top: 10px; margin-bottom: 10px;}'
#extra_css = 'img {float:right; margin:4px;}'
masthead_url = 'http://news.mingpao.com/image/portals_top_logo_news.gif'
keep_only_tags = [dict(name='h1'),
#dict(name='font', attrs={'style':['font-size:14pt; line-height:160%;']}), # for entertainment page
dict(attrs={'class':['photo']}),
dict(attrs={'id':['newscontent']}),
dict(attrs={'id':['newscontent01','newscontent02']})]
remove_tags = [dict(name='style'),
dict(attrs={'id':['newscontent135']})] # for the finance page
remove_attributes = ['width']
preprocess_regexps = [
(re.compile(r'<h5>', re.DOTALL|re.IGNORECASE),
lambda match: '<h1>'),
(re.compile(r'</h5>', re.DOTALL|re.IGNORECASE),
lambda match: '</h1>'),
]
def image_url_processor(cls, baseurl, url):
# trick: break the url at the first occurance of digit, add an additional
# '_' at the front
# not working, may need to move this to preprocess_html() method
#minIdx = 10000
#i0 = url.find('0')
#if i0 >= 0 and i0 < minIdx:
# minIdx = i0
#i1 = url.find('1')
#if i1 >= 0 and i1 < minIdx:
# minIdx = i1
#i2 = url.find('2')
#if i2 >= 0 and i2 < minIdx:
# minIdx = i2
#i3 = url.find('3')
#if i3 >= 0 and i0 < minIdx:
# minIdx = i3
#i4 = url.find('4')
#if i4 >= 0 and i4 < minIdx:
# minIdx = i4
#i5 = url.find('5')
#if i5 >= 0 and i5 < minIdx:
# minIdx = i5
#i6 = url.find('6')
#if i6 >= 0 and i6 < minIdx:
# minIdx = i6
#i7 = url.find('7')
#if i7 >= 0 and i7 < minIdx:
# minIdx = i7
#i8 = url.find('8')
#if i8 >= 0 and i8 < minIdx:
# minIdx = i8
#i9 = url.find('9')
#if i9 >= 0 and i9 < minIdx:
# minIdx = i9
#return url[0:minIdx] + '_' + url[minIdx+1:]
return url
def get_fetchdate(self):
dt_utc = datetime.datetime.utcnow()
# convert UTC to local hk time - at around HKT 5.30am, all news are available
dt_local = dt_utc - datetime.timedelta(-2.5/24)
# convert UTC to local hk time - at around HKT 6.00am, all news are available
dt_local = dt_utc - datetime.timedelta(-2.0/24)
return dt_local.strftime("%Y%m%d")
def parse_index(self):
feeds = []
dateStr = self.get_fetchdate()
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'), (u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'), (u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'), (u'\u793e\u8a55\u2027\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm'), (u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'), (u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'), (u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm'), ('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'), (u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm'), (u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),]:
for title, url in [(u'\u8981\u805e Headline', 'http://news.mingpao.com/' + dateStr + '/gaindex.htm'),
(u'\u6559\u80b2 Education', 'http://news.mingpao.com/' + dateStr + '/gfindex.htm'),
(u'\u6e2f\u805e Local', 'http://news.mingpao.com/' + dateStr + '/gbindex.htm'),
(u'\u793e\u8a55\u2027\u7b46\u9663 Editorial', 'http://news.mingpao.com/' + dateStr + '/mrindex.htm'),
(u'\u8ad6\u58c7 Forum', 'http://news.mingpao.com/' + dateStr + '/faindex.htm'),
(u'\u4e2d\u570b China', 'http://news.mingpao.com/' + dateStr + '/caindex.htm'),
(u'\u570b\u969b World', 'http://news.mingpao.com/' + dateStr + '/taindex.htm'),
('Tech News', 'http://news.mingpao.com/' + dateStr + '/naindex.htm'),
(u'\u9ad4\u80b2 Sport', 'http://news.mingpao.com/' + dateStr + '/spindex.htm'),
(u'\u526f\u520a Supplement', 'http://news.mingpao.com/' + dateStr + '/jaindex.htm'),
(u'\u82f1\u6587 English', 'http://news.mingpao.com/' + dateStr + '/emindex.htm')]:
articles = self.parse_section(url)
if articles:
feeds.append((title, articles))
# special - finance
fin_articles = self.parse_fin_section('http://www.mpfinance.com/htm/Finance/' + dateStr + '/News/ea,eb,ecindex.htm')
if fin_articles:
feeds.append((u'\u7d93\u6fdf Finance', fin_articles))
# special - eco-friendly
# eco_articles = self.parse_eco_section('http://tssl.mingpao.com/htm/marketing/eco/cfm/Eco1.cfm')
# if eco_articles:
# feeds.append((u'\u74b0\u4fdd Eco News', eco_articles))
# special - entertainment
#ent_articles = self.parse_ent_section('http://ol.mingpao.com/cfm/star1.cfm')
#if ent_articles:
# feeds.append(('Entertainment', ent_articles))
return feeds
def parse_section(self, url):
dateStr = self.get_fetchdate()
soup = self.index_to_soup(url)
divs = soup.findAll(attrs={'class': ['bullet','bullet_grey']})
current_articles = []
included_urls = []
divs.reverse()
for i in divs:
a = i.find('a', href = True)
title = self.tag_to_string(a)
url = a.get('href', False)
url = 'http://news.mingpao.com/' + dateStr + '/' +url
if url not in included_urls and url.rfind('Redirect') == -1:
current_articles.append({'title': title, 'url': url, 'description':'', 'date':''})
included_urls.append(url)
current_articles.reverse()
return current_articles
def parse_fin_section(self, url):
dateStr = self.get_fetchdate()
soup = self.index_to_soup(url)
a = soup.findAll('a', href= True)
current_articles = []
for i in a:
url = i.get('href', False)
if not url.rfind(dateStr) == -1 and url.rfind('index') == -1:
title = self.tag_to_string(i)
url = 'http://www.mpfinance.com/cfm/' +url
current_articles.append({'title': title, 'url': url, 'description':''})
return current_articles
def parse_eco_section(self, url):
soup = self.index_to_soup(url)
divs = soup.findAll(attrs={'class': ['bullet']})
current_articles = []
@ -53,9 +172,162 @@ class AdvancedUserRecipe1278063072(BasicNewsRecipe):
a = i.find('a', href = True)
title = self.tag_to_string(a)
url = a.get('href', False)
url = 'http://news.mingpao.com/' + dateStr + '/' +url
if url not in included_urls:
url = 'http://tssl.mingpao.com/htm/marketing/eco/cfm/' +url
if url not in included_urls and url.rfind('Redirect') == -1:
current_articles.append({'title': title, 'url': url, 'description':''})
included_urls.append(url)
return current_articles
#def parse_ent_section(self, url):
# dateStr = self.get_fetchdate()
# soup = self.index_to_soup(url)
# a = soup.findAll('a', href=True)
# current_articles = []
# included_urls = []
# for i in a:
# title = self.tag_to_string(i)
# url = 'http://ol.mingpao.com/cfm/' + i.get('href', False)
# if url not in included_urls and not url.rfind('.txt') == -1 and not url.rfind(dateStr) == -1 and not title == '':
# current_articles.append({'title': title, 'url': url, 'description': ''})
# return current_articles
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll(style=True):
del item['width']
for item in soup.findAll(stype=True):
del item['absmiddle']
return soup
def create_opf(self, feeds, dir=None):
#super(MPHKRecipe,self).create_opf(feeds, dir)
if dir is None:
dir = self.output_dir
title = self.short_title()
if self.output_profile.periodical_date_in_title:
title += strftime(self.timefmt)
mi = MetaInformation(title, [__appname__])
mi.publisher = __appname__
mi.author_sort = __appname__
mi.publication_type = self.publication_type+':'+self.short_title()
mi.timestamp = nowf()
mi.comments = self.description
if not isinstance(mi.comments, unicode):
mi.comments = mi.comments.decode('utf-8', 'replace')
mi.pubdate = nowf()
opf_path = os.path.join(dir, 'index.opf')
ncx_path = os.path.join(dir, 'index.ncx')
opf = OPFCreator(dir, mi)
# Add mastheadImage entry to <guide> section
mp = getattr(self, 'masthead_path', None)
if mp is not None and os.access(mp, os.R_OK):
from calibre.ebooks.metadata.opf2 import Guide
ref = Guide.Reference(os.path.basename(self.masthead_path), os.getcwdu())
ref.type = 'masthead'
ref.title = 'Masthead Image'
opf.guide.append(ref)
manifest = [os.path.join(dir, 'feed_%d'%i) for i in range(len(feeds))]
manifest.append(os.path.join(dir, 'index.html'))
manifest.append(os.path.join(dir, 'index.ncx'))
# Get cover
cpath = getattr(self, 'cover_path', None)
if cpath is None:
pf = open(os.path.join(dir, 'cover.jpg'), 'wb')
if self.default_cover(pf):
cpath = pf.name
if cpath is not None and os.access(cpath, os.R_OK):
opf.cover = cpath
manifest.append(cpath)
# Get masthead
mpath = getattr(self, 'masthead_path', None)
if mpath is not None and os.access(mpath, os.R_OK):
manifest.append(mpath)
opf.create_manifest_from_files_in(manifest)
for mani in opf.manifest:
if mani.path.endswith('.ncx'):
mani.id = 'ncx'
if mani.path.endswith('mastheadImage.jpg'):
mani.id = 'masthead-image'
entries = ['index.html']
toc = TOC(base_path=dir)
self.play_order_counter = 0
self.play_order_map = {}
def feed_index(num, parent):
f = feeds[num]
for j, a in enumerate(f):
if getattr(a, 'downloaded', False):
adir = 'feed_%d/article_%d/'%(num, j)
auth = a.author
if not auth:
auth = None
desc = a.text_summary
if not desc:
desc = None
else:
desc = self.description_limiter(desc)
entries.append('%sindex.html'%adir)
po = self.play_order_map.get(entries[-1], None)
if po is None:
self.play_order_counter += 1
po = self.play_order_counter
parent.add_item('%sindex.html'%adir, None, a.title if a.title else _('Untitled Article'),
play_order=po, author=auth, description=desc)
last = os.path.join(self.output_dir, ('%sindex.html'%adir).replace('/', os.sep))
for sp in a.sub_pages:
prefix = os.path.commonprefix([opf_path, sp])
relp = sp[len(prefix):]
entries.append(relp.replace(os.sep, '/'))
last = sp
if os.path.exists(last):
with open(last, 'rb') as fi:
src = fi.read().decode('utf-8')
soup = BeautifulSoup(src)
body = soup.find('body')
if body is not None:
prefix = '/'.join('..'for i in range(2*len(re.findall(r'link\d+', last))))
templ = self.navbar.generate(True, num, j, len(f),
not self.has_single_feed,
a.orig_url, __appname__, prefix=prefix,
center=self.center_navbar)
elem = BeautifulSoup(templ.render(doctype='xhtml').decode('utf-8')).find('div')
body.insert(len(body.contents), elem)
with open(last, 'wb') as fi:
fi.write(unicode(soup).encode('utf-8'))
if len(feeds) == 0:
raise Exception('All feeds are empty, aborting.')
if len(feeds) > 1:
for i, f in enumerate(feeds):
entries.append('feed_%d/index.html'%i)
po = self.play_order_map.get(entries[-1], None)
if po is None:
self.play_order_counter += 1
po = self.play_order_counter
auth = getattr(f, 'author', None)
if not auth:
auth = None
desc = getattr(f, 'description', None)
if not desc:
desc = None
feed_index(i, toc.add_item('feed_%d/index.html'%i, None,
f.title, play_order=po, description=desc, author=auth))
else:
entries.append('feed_%d/index.html'%0)
feed_index(0, toc)
for i, p in enumerate(entries):
entries[i] = os.path.join(dir, p.replace('/', os.sep))
opf.create_spine(entries)
opf.set_toc(toc)
with nested(open(opf_path, 'wb'), open(ncx_path, 'wb')) as (opf_file, ncx_file):
opf.render(opf_file, ncx_file)

View File

@ -0,0 +1,57 @@
__license__ = 'GPL v3'
__copyright__ = '2010, BlonG'
'''
www.rtvslo.si
'''
from calibre.web.feeds.news import BasicNewsRecipe
class MMCRTV(BasicNewsRecipe):
title = u'MMC RTV Slovenija'
__author__ = u'BlonG'
description = u"Prvi interaktivni multimedijski portal, MMC RTV Slovenija"
oldest_article = 3
max_articles_per_feed = 20
language = 'sl'
no_stylesheets = True
use_embedded_content = False
cover_url = 'https://sites.google.com/site/javno2010/home/rtv_slo_cover.jpg'
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
def print_version(self, url):
split_url = url.split("/")
print_url = 'http://www.rtvslo.si/index.php?c_mod=news&op=print&id=' + split_url[-1]
return print_url
keep_only_tags = [
dict(name='div', attrs={'class':'title'}),
dict(name='div', attrs={'id':'newsbody'}),
dict(name='div', attrs={'id':'newsblocks'}),
]
# remove_tags=[
# 40 dict(name='div', attrs={'id':'newsblocks'}),
# ]
feeds = [
(u'Slovenija', u'http://www.rtvslo.si/feeds/01.xml'),
(u'Svet', u'http://www.rtvslo.si/feeds/02.xml'),
(u'Evropska unija', u'http://www.rtvslo.si/feeds/16.xml'),
(u'Gospodarstvo', u'http://www.rtvslo.si/feeds/04.xml'),
(u'\u010crna kronika', u'http://www.rtvslo.si/feeds/08.xml'),
(u'Okolje', u'http://www.rtvslo.si/feeds/12.xml'),
(u'Znanost in tehnologija', u'http://www.rtvslo.si/feeds/09.xml'),
(u'Zabava', u'http://www.rtvslo.si/feeds/06.xml'),
(u'Ture avanture', u'http://www.rtvslo.si/feeds/28.xml'),
]
# def preprocess_html(self, soup):
# newsblocks = soup.find('div',attrs = ['id':'newsblocks'])
# soup.find('div', attrs = {'id':'newsbody'}).insert(-1, newsblocks)
# return soup

View File

@ -0,0 +1,56 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
'''
http://www.montevideo.com.uy
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Noticias(BasicNewsRecipe):
title = 'Montevideo COMM'
__author__ = 'Gustavo Azambuja'
description = 'Noticias de Uruguay'
language = 'es'
timefmt = '[%a, %d %b, %Y]'
use_embedded_content = False
recursion = 5
encoding = 'utf-8'
remove_javascript = True
no_stylesheets = True
oldest_article = 2
max_articles_per_feed = 100
keep_only_tags = [dict(id=['txt'])]
remove_tags = [
dict(name=['object','link'])
]
remove_attributes = ['width','height', 'style', 'font', 'color']
extra_css = '''
h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
p {font-family:Arial,Helvetica,sans-serif;}
'''
feeds = [
(u'Destacados', u'http://www.montevideo.com.uy/anxml.aspx?58'),
(u'Noticias', u'http://www.montevideo.com.uy/anxml.aspx?59'),
(u'Tecnologia', u'http://www.montevideo.com.uy/anxml.aspx?133'),
(u'Tiempo Libre', u'http://www.montevideo.com.uy/anxml.aspx?60'),
# (u'Deportes', u'http://www.montevideo.com.uy/anxml.aspx?968'),
# (u'Pantallazo', u'http://www.montevideo.com.uy/anxml.aspx?1022'),
(u'Gastronomia', u'http://www.montevideo.com.uy/anxml.aspx?1023')
]
def get_cover_url(self):
return 'http://sphotos.ak.fbcdn.net/hphotos-ak-snc1/hs276.snc1/10319_147339559330_147337559330_2625816_6636564_n.jpg'
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -1,30 +1,32 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
'''
moscowtimes.ru
www.themoscowtimes.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Moscowtimes(BasicNewsRecipe):
title = u'The Moscow Times'
title = 'The Moscow Times'
__author__ = 'Darko Miletic and Sujata Raman'
description = 'News from Russia'
description = 'The Moscow Times is a daily English-language newspaper featuring objective, reliable news on business, politics, sports and culture in Moscow, in Russia and the former Soviet Union (CIS).'
category = 'Russia, Moscow, Russian news, Moscow news, Russian newspaper, daily news, independent news, reliable news, USSR, Soviet Union, CIS, Russian politics, Russian business, Russian culture, Russian opinion, St Petersburg, Saint Petersburg'
publisher = 'The Moscow Times'
language = 'en'
lang = 'en'
oldest_article = 7
oldest_article = 2
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
#encoding = 'utf-8'
encoding = 'cp1252'
remove_javascript = True
remove_empty_feeds = True
encoding = 'cp1251'
masthead_url = 'http://www.themoscowtimes.com/bitrix/templates/tmt/img/logo.gif'
publication_type = 'newspaper'
conversion_options = {
'comment' : description
, 'language' : lang
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
extra_css = '''
@ -35,39 +37,37 @@ class Moscowtimes(BasicNewsRecipe):
.text{font-family:Arial,Tahoma,Verdana,Helvetica,sans-serif ; font-size:75%; }
'''
feeds = [
(u'The Moscow Times Top Stories' , u'http://www.themoscowtimes.com/rss/top'),
(u'The Moscow Times Current Issue' , u'http://www.themoscowtimes.com/rss/issue'),
(u'The Moscow Times News' , u'http://www.themoscowtimes.com/rss/news'),
(u'The Moscow Times Business' , u'http://www.themoscowtimes.com/rss/business'),
(u'The Moscow Times Art and Ideas' , u'http://www.themoscowtimes.com/rss/art'),
(u'The Moscow Times Opinion' , u'http://www.themoscowtimes.com/rss/opinion')
]
keep_only_tags = [
dict(name='div', attrs={'class':['newstextblock']})
(u'Top Stories' , u'http://www.themoscowtimes.com/rss/top' )
,(u'Current Issue' , u'http://www.themoscowtimes.com/rss/issue' )
,(u'News' , u'http://www.themoscowtimes.com/rss/news' )
,(u'Business' , u'http://www.themoscowtimes.com/rss/business')
,(u'Art and Ideas' , u'http://www.themoscowtimes.com/rss/art' )
,(u'Opinion' , u'http://www.themoscowtimes.com/rss/opinion' )
]
keep_only_tags = [dict(name='div', attrs={'id':'content'})]
remove_tags = [
dict(name='div', attrs={'class':['photo_nav']})
dict(name='div', attrs={'class':['photo_nav','phototext']})
,dict(name=['iframe','meta','base','link','embed','object'])
]
def preprocess_html(self, soup):
soup.html['xml:lang'] = self.lang
soup.html['lang'] = self.lang
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
soup.head.insert(0,mtag)
return self.adeify_images(soup)
for lnk in soup.findAll('a'):
if lnk.string is not None:
ind = self.tag_to_string(lnk)
lnk.replaceWith(ind)
return soup
def print_version(self, url):
return url.replace('.themoscowtimes.com/','.themoscowtimes.com/print/')
def get_cover_url(self):
cover_url = None
href = 'http://www.themoscowtimes.com/pdf/'
soup = self.index_to_soup(href)
div = soup.find('div',attrs={'class':'left'})
if div:
a = div.find('a')
print a
if a :
cover_url = a.img['src']
cover_url = 'http://www.themoscowtimes.com' + a.img['src']
return cover_url

View File

@ -6,6 +6,8 @@ __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
sankei.jp.msn.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class MSNSankeiNewsProduct(BasicNewsRecipe):
title = u'MSN\u7523\u7d4c\u30cb\u30e5\u30fc\u30b9(\u65b0\u5546\u54c1)'
__author__ = 'Hiroshi Miura'

View File

@ -0,0 +1,68 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2010, Mateusz Kielar, matek09@gmail.com'
from calibre.web.feeds.news import BasicNewsRecipe
class Newsweek(BasicNewsRecipe):
EDITION = 0
title = u'Newsweek Polska'
__author__ = 'Mateusz Kielar'
description = 'Weekly magazine'
encoding = 'utf-8'
no_stylesheets = True
language = 'en'
remove_javascript = True
keep_only_tags =[]
keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'article'}))
remove_tags =[]
remove_tags.append(dict(name = 'div', attrs = {'class' : 'copy'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'url'}))
extra_css = '''
.body {font-size: small}
.author {font-size: x-small}
.lead {font-size: x-small}
.title{font-size: x-large; font-weight: bold}
'''
def print_version(self, url):
return url.replace("http://www.newsweek.pl/artykuly/wydanie/" + str(self.EDITION), "http://www.newsweek.pl/artykuly") + '/print'
def find_last_full_issue(self):
page = self.index_to_soup('http://www.newsweek.pl/Frames/IssueCover.aspx')
issue = 'http://www.newsweek.pl/Frames/' + page.find(lambda tag: tag.name == 'span' and not tag.attrs).a['href']
page = self.index_to_soup(issue)
issue = 'http://www.newsweek.pl/Frames/' + page.find(lambda tag: tag.name == 'span' and not tag.attrs).a['href']
page = self.index_to_soup(issue)
self.EDITION = page.find('a', attrs={'target' : '_parent'})['href'].replace('/wydania/','')
def parse_index(self):
self.find_last_full_issue()
soup = self.index_to_soup('http://www.newsweek.pl/wydania/' + str(self.EDITION))
img = soup.find('img', id="ctl00_C1_PaperIsssueView_IssueImage", src=True)
self.cover_url = img['src']
feeds = []
parent = soup.find(id='content-left-big')
for txt in parent.findAll(attrs={'class':'txt_normal_red strong'}):
section = self.tag_to_string(txt).capitalize()
articles = list(self.find_articles(txt))
feeds.append((section, articles))
return feeds
def find_articles(self, txt):
for a in txt.findAllNext( attrs={'class':['strong','hr']}):
if a.name in "div":
break
yield {
'title' : self.tag_to_string(a),
'url' : 'http://www.newsweek.pl'+a['href'],
'date' : '',
'description' : ''
}

View File

@ -6,6 +6,8 @@ __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
www.nikkei.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class NikkeiNet(BasicNewsRecipe):
title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(Free)'
__author__ = 'Hiroshi Miura'

View File

@ -0,0 +1,125 @@
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
import mechanize
from calibre.ptempfile import PersistentTemporaryFile
class NikkeiNet_subscription(BasicNewsRecipe):
title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248'
__author__ = 'Hiroshi Miura'
description = 'News and current market affairs from Japan'
needs_subscription = True
oldest_article = 2
max_articles_per_feed = 20
language = 'ja'
remove_javascript = False
temp_files = []
remove_tags_before = {'class':"cmn-section cmn-indent"}
remove_tags = [
{'class':"JSID_basePageMove JSID_baseAsyncSubmit cmn-form_area JSID_optForm_utoken"},
{'class':"cmn-article_keyword cmn-clearfix"},
{'class':"cmn-print_headline cmn-clearfix"},
]
remove_tags_after = {'class':"cmn-pr_list"}
def get_browser(self):
br = BasicNewsRecipe.get_browser()
cj = mechanize.LWPCookieJar()
br.set_cookiejar(cj)
#br.set_debug_http(True)
#br.set_debug_redirects(True)
#br.set_debug_responses(True)
if self.username is not None and self.password is not None:
#print "----------------------------get login form--------------------------------------------"
# open login form
br.open('https://id.nikkei.com/lounge/nl/base/LA0010.seam')
response = br.response()
#print "----------------------------get login form---------------------------------------------"
#print "----------------------------set login form---------------------------------------------"
# remove disabled input which brings error on mechanize
response.set_data(response.get_data().replace("<input id=\"j_id48\"", "<!-- "))
response.set_data(response.get_data().replace("gm_home_on.gif\" />", " -->"))
br.set_response(response)
br.select_form(name='LA0010Form01')
br['LA0010Form01:LA0010Email'] = self.username
br['LA0010Form01:LA0010Password'] = self.password
br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
br.submit()
br.response()
#print "----------------------------send login form---------------------------------------------"
#print "----------------------------open news main page-----------------------------------------"
# open news site
br.open('http://www.nikkei.com/')
br.response()
#print "----------------------------www.nikkei.com BODY --------------------------------------"
#print response2.get_data()
#print "-------------------------^^-got auto redirect form----^^--------------------------------"
# forced redirect in default
br.select_form(nr=0)
br.submit()
response3 = br.response()
# return some cookie which should be set by Javascript
#print response3.geturl()
raw = response3.get_data()
#print "---------------------------response to form --------------------------------------------"
# grab cookie from JS and set it
redirectflag = re.search(r"var checkValue = '(\d+)';", raw, re.M).group(1)
br.select_form(nr=0)
self.temp_files.append(PersistentTemporaryFile('_fa.html'))
self.temp_files[-1].write("#LWP-Cookies-2.0\n")
self.temp_files[-1].write("Set-Cookie3: Cookie-dummy=Cookie-value; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
self.temp_files[-1].write("Set-Cookie3: redirectFlag="+redirectflag+"; domain=\".nikkei.com\"; path=\"/\"; path_spec; secure; expires=\"2029-12-21 05:07:59Z\"; version=0\n")
self.temp_files[-1].close()
cj.load(self.temp_files[-1].name)
br.submit()
#br.set_debug_http(False)
#br.set_debug_redirects(False)
#br.set_debug_responses(False)
return br
feeds = [ (u'\u65e5\u7d4c\u4f01\u696d', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sangyo'),
(u'\u65e5\u7d4c\u88fd\u54c1', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=newpro'),
(u'internet', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=internet'),
(u'\u653f\u6cbb', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=seiji'),
(u'\u8ca1\u52d9', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=zaimu'),
(u'\u7d4c\u6e08', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=keizai'),
(u'\u56fd\u969b', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kaigai'),
(u'\u79d1\u5b66', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kagaku'),
(u'\u30de\u30fc\u30b1\u30c3\u30c8', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=market'),
(u'\u304f\u3089\u3057', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kurashi'),
(u'\u30b9\u30dd\u30fc\u30c4', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sports'),
(u'\u793e\u4f1a', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=shakai'),
(u'\u30a8\u30b3', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=eco'),
(u'\u5065\u5eb7', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kenkou'),
(u'\u96c7\u7528', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=koyou'),
(u'\u6559\u80b2', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=kyouiku'),
(u'\u304a\u304f\u3084\u307f', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=okuyami'),
(u'\u4eba\u4e8b', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=zinzi'),
(u'\u7279\u96c6', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=special'),
(u'\u5730\u57df\u30cb\u30e5\u30fc\u30b9', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=local'),
(u'\u7d71\u8a08\u30fb\u767d\u66f8', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=report'),
(u'\u30e9\u30f3\u30ad\u30f3\u30b0', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=ranking'),
(u'\u4f1a\u898b', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=interview'),
(u'\u793e\u8aac\u30fb\u6625\u79cb', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=shasetsu'),
(u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30d7\u30ed\u91ce\u7403', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=baseball'),
(u'\u30b9\u30dd\u30fc\u30c4\uff1a\u5927\u30ea\u30fc\u30b0', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=mlb'),
(u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30b5\u30c3\u30ab\u30fc', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=soccer'),
(u'\u30b9\u30dd\u30fc\u30c4\uff1a\u30b4\u30eb\u30d5', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=golf'),
(u'\u30b9\u30dd\u30fc\u30c4\uff1a\u76f8\u64b2', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=sumou'),
(u'\u30b9\u30dd\u30fc\u30c4\uff1a\u7af6\u99ac', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=keiba'),
(u'\u8abf\u67fb\u30fb\u30a2\u30f3\u30b1\u30fc\u30c8', u'http://www.zou3.net/php/rss/nikkei2rss.php?head=research')
]

View File

@ -6,13 +6,11 @@ __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
www.nikkei.com
'''
import string, re, sys
from calibre import strftime
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
import mechanize
from calibre.ptempfile import PersistentTemporaryFile
class NikkeiNet_sub_economy(BasicNewsRecipe):
title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(\u7d4c\u6e08)'
__author__ = 'Hiroshi Miura'
@ -70,12 +68,12 @@ class NikkeiNet_sub_economy(BasicNewsRecipe):
br['LA0010Form01:LA0010Password'] = self.password
br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
br.submit()
response1 = br.response()
br.response()
#print "----------------------------send login form---------------------------------------------"
#print "----------------------------open news main page-----------------------------------------"
# open news site
br.open('http://www.nikkei.com/')
response2 = br.response()
br.response()
#print "----------------------------www.nikkei.com BODY --------------------------------------"
#print response2.get_data()
#print "-------------------------^^-got auto redirect form----^^--------------------------------"

View File

@ -6,8 +6,7 @@ __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
www.nikkei.com
'''
import string, re, sys
from calibre import strftime
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
import mechanize
from calibre.ptempfile import PersistentTemporaryFile
@ -68,12 +67,12 @@ class NikkeiNet_sub_industory(BasicNewsRecipe):
br['LA0010Form01:LA0010Password'] = self.password
br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
br.submit()
response1 = br.response()
br.response()
#print "----------------------------send login form---------------------------------------------"
#print "----------------------------open news main page-----------------------------------------"
# open news site
br.open('http://www.nikkei.com/')
response2 = br.response()
br.response()
#print "----------------------------www.nikkei.com BODY --------------------------------------"
#print response2.get_data()
#print "-------------------------^^-got auto redirect form----^^--------------------------------"

View File

@ -6,8 +6,7 @@ __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
www.nikkei.com
'''
import string, re, sys
from calibre import strftime
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
import mechanize
from calibre.ptempfile import PersistentTemporaryFile
@ -69,12 +68,12 @@ class NikkeiNet_sub_life(BasicNewsRecipe):
br['LA0010Form01:LA0010Password'] = self.password
br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
br.submit()
response1 = br.response()
br.response()
#print "----------------------------send login form---------------------------------------------"
#print "----------------------------open news main page-----------------------------------------"
# open news site
br.open('http://www.nikkei.com/')
response2 = br.response()
br.response()
#print "----------------------------www.nikkei.com BODY --------------------------------------"
#print response2.get_data()
#print "-------------------------^^-got auto redirect form----^^--------------------------------"

View File

@ -6,8 +6,7 @@ __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
www.nikkei.com
'''
import string, re, sys
from calibre import strftime
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
import mechanize
from calibre.ptempfile import PersistentTemporaryFile
@ -62,12 +61,12 @@ class NikkeiNet_sub_main(BasicNewsRecipe):
br['LA0010Form01:LA0010Password'] = self.password
br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
br.submit()
response1 = br.response()
br.response()
#print "----------------------------send login form---------------------------------------------"
#print "----------------------------open news main page-----------------------------------------"
# open news site
br.open('http://www.nikkei.com/')
response2 = br.response()
br.response()
#print "----------------------------www.nikkei.com BODY --------------------------------------"
#print response2.get_data()
#print "-------------------------^^-got auto redirect form----^^--------------------------------"

View File

@ -6,8 +6,7 @@ __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
www.nikkei.com
'''
import string, re, sys
from calibre import strftime
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
import mechanize
from calibre.ptempfile import PersistentTemporaryFile
@ -69,12 +68,12 @@ class NikkeiNet_sub_sports(BasicNewsRecipe):
br['LA0010Form01:LA0010Password'] = self.password
br.form.find_control(id='LA0010Form01:LA0010AutoLoginOn',type="checkbox").get(nr=0).selected = True
br.submit()
response1 = br.response()
br.response()
#print "----------------------------send login form---------------------------------------------"
#print "----------------------------open news main page-----------------------------------------"
# open news site
br.open('http://www.nikkei.com/')
response2 = br.response()
br.response()
#print "----------------------------www.nikkei.com BODY --------------------------------------"
#print response2.get_data()
#print "-------------------------^^-got auto redirect form----^^--------------------------------"

View File

@ -0,0 +1,36 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#Based on Lars Jacob's Taz Digiabo recipe
__license__ = 'GPL v3'
__copyright__ = '2010, Starson17'
import os, urllib2, zipfile
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ptempfile import PersistentTemporaryFile
class NowToronto(BasicNewsRecipe):
title = u'Now Toronto'
description = u'Now Toronto'
__author__ = 'Starson17'
language = 'en_CA'
conversion_options = {
'no_default_epub_cover' : True
}
def build_index(self):
epub_feed = "http://feeds.feedburner.com/NowEpubEditions"
soup = self.index_to_soup(epub_feed)
url = soup.find(name = 'feedburner:origlink').string
f = urllib2.urlopen(url)
tmp = PersistentTemporaryFile(suffix='.epub')
self.report_progress(0,_('downloading epub'))
tmp.write(f.read())
tmp.close()
zfile = zipfile.ZipFile(tmp.name, 'r')
self.report_progress(0,_('extracting epub'))
zfile.extractall(self.output_dir)
tmp.close()
index = os.path.join(self.output_dir, 'content.opf')
self.report_progress(1,_('epub downloaded and extracted'))
return index

View File

@ -7,14 +7,22 @@ nytimes.com
'''
import re, string, time
from calibre import entity_to_unicode, strftime
from datetime import timedelta, date
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, BeautifulStoneSoup
class NYTimes(BasicNewsRecipe):
# set headlinesOnly to True for the headlines-only version
# set headlinesOnly to True for the headlines-only version. If True, webEdition is ignored.
headlinesOnly = True
# set webEdition to True for the Web edition of the newspaper. Set oldest_article to the
# number of days old an article can be for inclusion. If oldest_article = 0 all articles
# will be included. Note: oldest_article is ignored if webEdition = False
webEdition = False
oldest_article = 7
# includeSections: List of sections to include. If empty, all sections found will be included.
# Otherwise, only the sections named will be included. For example,
#
@ -39,20 +47,76 @@ class NYTimes(BasicNewsRecipe):
# from an article (if one exists). If one_picture_per_article = True, the image
# will be moved to a location between the headline and the byline.
# If one_picture_per_article = False, all images from the article will be included
# and shown in their original location.
one_picture_per_article = True
one_picture_per_article = False
# The maximum number of articles that will be downloaded
max_articles_per_feed = 100
# Whether to omit duplicates of articles (typically arsing when articles are indexed in
# more than one section). If True, only the first occurance will be downloaded.
filterDuplicates = True
# Sections to collect for the Web edition.
# Delete any you don't want, or use includeSections or excludeSections
web_sections = [(u'World',u'world'),
(u'U.S.',u'national'),
(u'Politics',u'politics'),
(u'New York',u'nyregion'),
(u'Business','business'),
(u'Technology',u'technology'),
(u'Sports',u'sports'),
(u'Science',u'science'),
(u'Health',u'health'),
(u'Opinion',u'opinion'),
(u'Arts',u'arts'),
(u'Books',u'books'),
(u'Movies',u'movies'),
(u'Music',u'arts/music'),
(u'Television',u'arts/television'),
(u'Style',u'style'),
(u'Dining & Wine',u'dining'),
(u'Fashion & Style',u'fashion'),
(u'Home & Garden',u'garden'),
(u'Travel',u'travel'),
('Education',u'education'),
('Multimedia',u'multimedia'),
(u'Obituaries',u'obituaries'),
(u'Sunday Magazine',u'magazine'),
(u'Week in Review',u'weekinreview')]
if headlinesOnly:
title='New York Times Headlines'
description = 'Headlines from the New York Times'
needs_subscription = False
elif webEdition:
title='New York Times (Web)'
description = 'New York Times on the Web'
needs_subscription = True
else:
title='New York Times'
description = 'Today\'s New York Times'
needs_subscription = True
month_list = ['january','february','march','april','may','june','july','august','september','october','november','december']
def decode_us_date(self,datestr):
udate = datestr.strip().lower().split()
try:
m = self.month_list.index(udate[0])+1
except:
return date.today()
d = int(udate[1])
y = int(udate[2])
try:
d = date(y,m,d)
except:
d = date.today
return d
earliest_date = date.today() - timedelta(days=oldest_article)
__author__ = 'GRiker/Kovid Goyal/Nick Redding'
language = 'en'
@ -136,6 +200,12 @@ class NYTimes(BasicNewsRecipe):
.image {text-align: center;}
.source {text-align: left; }'''
articles = {}
key = None
ans = []
url_list = []
def filter_ans(self, ans) :
total_article_count = 0
idx = 0
@ -164,6 +234,29 @@ class NYTimes(BasicNewsRecipe):
self.log( "Queued %d articles" % total_article_count )
return ans
def exclude_url(self,url):
if not url.startswith("http"):
return True
if not url.endswith(".html"):
return True
if 'nytimes.com' not in url:
return True
if 'podcast' in url:
return True
if '/video/' in url:
return True
if '/slideshow/' in url:
return True
if '/magazine/index' in url:
return True
if '/interactive/' in url:
return True
if '/reference/' in url:
return True
if '/premium/' in url:
return True
return False
def fixChars(self,string):
# Replace lsquo (\x91)
fixed = re.sub("\x91","",string)
@ -249,7 +342,6 @@ class NYTimes(BasicNewsRecipe):
return BeautifulSoup(_raw, markupMassage=massage)
# Entry point
print "index_to_soup()"
soup = get_the_soup( self.encoding, url_or_raw )
contentType = soup.find(True,attrs={'http-equiv':'Content-Type'})
docEncoding = str(contentType)[str(contentType).find('charset=') + len('charset='):str(contentType).rfind('"')]
@ -273,33 +365,24 @@ class NYTimes(BasicNewsRecipe):
else:
return description
def parse_todays_index(self):
def feed_title(div):
def feed_title(self,div):
return ''.join(div.findAll(text=True, recursive=True)).strip()
articles = {}
key = None
ans = []
url_list = []
def handle_article(div):
def handle_article(self,div):
thumbnail = div.find('div','thumbnail')
if thumbnail:
thumbnail.extract()
a = div.find('a', href=True)
if not a:
return
url = re.sub(r'\?.*', '', a['href'])
if not url.startswith("http"):
return
if not url.endswith(".html"):
return
if 'podcast' in url:
return
if '/video/' in url:
if self.exclude_url(url):
return
url += '?pagewanted=all'
if url in url_list:
if self.filterDuplicates:
if url in self.url_list:
return
url_list.append(url)
self.url_list.append(url)
title = self.tag_to_string(a, use_alt=True).strip()
description = ''
pubdate = strftime('%a, %d %b')
@ -314,42 +397,78 @@ class NYTimes(BasicNewsRecipe):
authorAttribution = div.find(True, attrs={'class':'byline'})
if authorAttribution:
author = self.tag_to_string(authorAttribution, use_alt=False)
feed = key if key is not None else 'Uncategorized'
if not articles.has_key(feed):
ans.append(feed)
articles[feed] = []
articles[feed].append(
feed = self.key if self.key is not None else 'Uncategorized'
if not self.articles.has_key(feed):
self.ans.append(feed)
self.articles[feed] = []
self.articles[feed].append(
dict(title=title, url=url, date=pubdate,
description=description, author=author,
content=''))
def parse_web_edition(self):
for (sec_title,index_url) in self.web_sections:
if self.includeSections != []:
if sec_title not in self.includeSections:
print "SECTION NOT INCLUDED: ",sec_title
continue
if sec_title in self.excludeSections:
print "SECTION EXCLUDED: ",sec_title
continue
print 'Index URL: '+'http://www.nytimes.com/pages/'+index_url+'/index.html'
soup = self.index_to_soup('http://www.nytimes.com/pages/'+index_url+'/index.html')
self.key = sec_title
# Find each article
for div in soup.findAll(True,
attrs={'class':['section-headline', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}):
if div['class'] in ['story', 'story headline'] :
self.handle_article(div)
elif div['class'] == 'headlinesOnly multiline flush':
for lidiv in div.findAll('li'):
self.handle_article(lidiv)
self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
return self.filter_ans(self.ans)
def parse_todays_index(self):
soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html')
skipping = False
# Find each article
for div in soup.findAll(True,
attrs={'class':['section-headline', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}):
if div['class'] in ['section-headline','sectionHeader']:
key = string.capwords(feed_title(div))
key = key.replace('Op-ed','Op-Ed')
key = key.replace('U.s.','U.S.')
self.key = string.capwords(self.feed_title(div))
self.key = self.key.replace('Op-ed','Op-Ed')
self.key = self.key.replace('U.s.','U.S.')
self.key = self.key.replace('N.y.','N.Y.')
skipping = False
if self.includeSections != []:
if self.key not in self.includeSections:
print "SECTION NOT INCLUDED: ",self.key
skipping = True
if self.key in self.excludeSections:
print "SECTION EXCLUDED: ",self.key
skipping = True
elif div['class'] in ['story', 'story headline'] :
handle_article(div)
if not skipping:
self.handle_article(div)
elif div['class'] == 'headlinesOnly multiline flush':
for lidiv in div.findAll('li'):
handle_article(lidiv)
if not skipping:
self.handle_article(lidiv)
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
return self.filter_ans(ans)
self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
return self.filter_ans(self.ans)
def parse_headline_index(self):
articles = {}
ans = []
url_list = []
soup = self.index_to_soup('http://www.nytimes.com/pages/todaysheadlines/')
# Fetch the content table
@ -363,15 +482,24 @@ class NYTimes(BasicNewsRecipe):
for td_col in content_table.findAll('td', {'id' : re.compile('Column')}):
for div_sec in td_col.findAll('div',recursive=False):
for h6_sec_name in div_sec.findAll('h6',{'style' : re.compile('text-transform: *uppercase')}):
section_name = self.tag_to_string(h6_sec_name,use_alt=False)
section_name = re.sub(r'^ *$','',section_name)
if section_name == '':
continue
if self.includeSections != []:
if section_name not in self.includeSections:
print "SECTION NOT INCLUDED: ",section_name
continue
if section_name in self.excludeSections:
print "SECTION EXCLUDED: ",section_name
continue
section_name=string.capwords(section_name)
if section_name == 'U.s.':
section_name = 'U.S.'
elif section_name == 'Op-ed':
section_name = 'Op-Ed'
section_name = section_name.replace('Op-ed','Op-Ed')
section_name = section_name.replace('U.s.','U.S.')
section_name = section_name.replace('N.y.','N.Y.')
pubdate = strftime('%a, %d %b')
search_div = div_sec
@ -392,37 +520,32 @@ class NYTimes(BasicNewsRecipe):
if not a:
continue
url = re.sub(r'\?.*', '', a['href'])
if not url.startswith("http"):
continue
if not url.endswith(".html"):
continue
if 'podcast' in url:
continue
if 'video' in url:
if self.exclude_url(url):
continue
url += '?pagewanted=all'
if url in url_list:
if self.filterDuplicates:
if url in self.url_list:
continue
url_list.append(url)
self.log("URL %s" % url)
self.url_list.append(url)
title = self.tag_to_string(a, use_alt=True).strip()
desc = h3_item.find('p')
if desc is not None:
description = self.tag_to_string(desc,use_alt=False)
else:
description = ''
if not articles.has_key(section_name):
ans.append(section_name)
articles[section_name] = []
articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
if not self.articles.has_key(section_name):
self.ans.append(section_name)
self.articles[section_name] = []
self.articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
return self.filter_ans(ans)
self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
return self.filter_ans(self.ans)
def parse_index(self):
if self.headlinesOnly:
return self.parse_headline_index()
elif self.webEdition:
return self.parse_web_edition()
else:
return self.parse_todays_index()
@ -438,6 +561,21 @@ class NYTimes(BasicNewsRecipe):
def preprocess_html(self, soup):
if self.webEdition & (self.oldest_article>0):
date_tag = soup.find(True,attrs={'class': ['dateline','date']})
if date_tag:
date_str = self.tag_to_string(date_tag,use_alt=False)
date_str = date_str.replace('Published:','')
date_items = date_str.split(',')
try:
datestring = date_items[0]+' '+date_items[1]
article_date = self.decode_us_date(datestring)
except:
article_date = date.today()
if article_date < self.earliest_date:
self.log("Skipping article dated %s" % date_str)
return None
kicker_tag = soup.find(attrs={'class':'kicker'})
if kicker_tag: # remove Op_Ed author head shots
tagline = self.tag_to_string(kicker_tag)
@ -462,7 +600,6 @@ class NYTimes(BasicNewsRecipe):
for inlineImg in inlineImgs[1:]:
inlineImg.extract()
# Move firstImg before article body
#article_body = soup.find(True, {'id':'articleBody'})
cgFirst = soup.find(True, {'class':re.compile('columnGroup *first')})
if cgFirst:
# Strip all sibling NavigableStrings: noise
@ -548,4 +685,3 @@ class NYTimes(BasicNewsRecipe):
divTag.replaceWith(tag)
return soup

View File

@ -7,14 +7,22 @@ nytimes.com
'''
import re, string, time
from calibre import entity_to_unicode, strftime
from datetime import timedelta, date
from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, Tag, BeautifulStoneSoup
class NYTimes(BasicNewsRecipe):
# set headlinesOnly to True for the headlines-only version
# set headlinesOnly to True for the headlines-only version. If True, webEdition is ignored.
headlinesOnly = False
# set webEdition to True for the Web edition of the newspaper. Set oldest_article to the
# number of days old an article can be for inclusion. If oldest_article = 0 all articles
# will be included. Note: oldest_article is ignored if webEdition = False
webEdition = False
oldest_article = 7
# includeSections: List of sections to include. If empty, all sections found will be included.
# Otherwise, only the sections named will be included. For example,
#
@ -39,20 +47,76 @@ class NYTimes(BasicNewsRecipe):
# from an article (if one exists). If one_picture_per_article = True, the image
# will be moved to a location between the headline and the byline.
# If one_picture_per_article = False, all images from the article will be included
# and shown in their original location.
one_picture_per_article = True
one_picture_per_article = False
# The maximum number of articles that will be downloaded
max_articles_per_feed = 100
# Whether to omit duplicates of articles (typically arsing when articles are indexed in
# more than one section). If True, only the first occurance will be downloaded.
filterDuplicates = True
# Sections to collect for the Web edition.
# Delete any you don't want, or use includeSections or excludeSections
web_sections = [(u'World',u'world'),
(u'U.S.',u'national'),
(u'Politics',u'politics'),
(u'New York',u'nyregion'),
(u'Business','business'),
(u'Technology',u'technology'),
(u'Sports',u'sports'),
(u'Science',u'science'),
(u'Health',u'health'),
(u'Opinion',u'opinion'),
(u'Arts',u'arts'),
(u'Books',u'books'),
(u'Movies',u'movies'),
(u'Music',u'arts/music'),
(u'Television',u'arts/television'),
(u'Style',u'style'),
(u'Dining & Wine',u'dining'),
(u'Fashion & Style',u'fashion'),
(u'Home & Garden',u'garden'),
(u'Travel',u'travel'),
('Education',u'education'),
('Multimedia',u'multimedia'),
(u'Obituaries',u'obituaries'),
(u'Sunday Magazine',u'magazine'),
(u'Week in Review',u'weekinreview')]
if headlinesOnly:
title='New York Times Headlines'
description = 'Headlines from the New York Times'
needs_subscription = False
elif webEdition:
title='New York Times (Web)'
description = 'New York Times on the Web'
needs_subscription = True
else:
title='New York Times'
description = 'Today\'s New York Times'
needs_subscription = True
month_list = ['january','february','march','april','may','june','july','august','september','october','november','december']
def decode_us_date(self,datestr):
udate = datestr.strip().lower().split()
try:
m = self.month_list.index(udate[0])+1
except:
return date.today()
d = int(udate[1])
y = int(udate[2])
try:
d = date(y,m,d)
except:
d = date.today
return d
earliest_date = date.today() - timedelta(days=oldest_article)
__author__ = 'GRiker/Kovid Goyal/Nick Redding'
language = 'en'
@ -60,7 +124,6 @@ class NYTimes(BasicNewsRecipe):
timefmt = ''
needs_subscription = True
masthead_url = 'http://graphics8.nytimes.com/images/misc/nytlogo379x64.gif'
cover_margins = (18,18,'grey99')
@ -137,6 +200,12 @@ class NYTimes(BasicNewsRecipe):
.image {text-align: center;}
.source {text-align: left; }'''
articles = {}
key = None
ans = []
url_list = []
def filter_ans(self, ans) :
total_article_count = 0
idx = 0
@ -165,6 +234,29 @@ class NYTimes(BasicNewsRecipe):
self.log( "Queued %d articles" % total_article_count )
return ans
def exclude_url(self,url):
if not url.startswith("http"):
return True
if not url.endswith(".html"):
return True
if 'nytimes.com' not in url:
return True
if 'podcast' in url:
return True
if '/video/' in url:
return True
if '/slideshow/' in url:
return True
if '/magazine/index' in url:
return True
if '/interactive/' in url:
return True
if '/reference/' in url:
return True
if '/premium/' in url:
return True
return False
def fixChars(self,string):
# Replace lsquo (\x91)
fixed = re.sub("\x91","",string)
@ -250,7 +342,6 @@ class NYTimes(BasicNewsRecipe):
return BeautifulSoup(_raw, markupMassage=massage)
# Entry point
print "index_to_soup()"
soup = get_the_soup( self.encoding, url_or_raw )
contentType = soup.find(True,attrs={'http-equiv':'Content-Type'})
docEncoding = str(contentType)[str(contentType).find('charset=') + len('charset='):str(contentType).rfind('"')]
@ -274,33 +365,24 @@ class NYTimes(BasicNewsRecipe):
else:
return description
def parse_todays_index(self):
def feed_title(div):
def feed_title(self,div):
return ''.join(div.findAll(text=True, recursive=True)).strip()
articles = {}
key = None
ans = []
url_list = []
def handle_article(div):
def handle_article(self,div):
thumbnail = div.find('div','thumbnail')
if thumbnail:
thumbnail.extract()
a = div.find('a', href=True)
if not a:
return
url = re.sub(r'\?.*', '', a['href'])
if not url.startswith("http"):
return
if not url.endswith(".html"):
return
if 'podcast' in url:
return
if '/video/' in url:
if self.exclude_url(url):
return
url += '?pagewanted=all'
if url in url_list:
if self.filterDuplicates:
if url in self.url_list:
return
url_list.append(url)
self.url_list.append(url)
title = self.tag_to_string(a, use_alt=True).strip()
description = ''
pubdate = strftime('%a, %d %b')
@ -315,42 +397,78 @@ class NYTimes(BasicNewsRecipe):
authorAttribution = div.find(True, attrs={'class':'byline'})
if authorAttribution:
author = self.tag_to_string(authorAttribution, use_alt=False)
feed = key if key is not None else 'Uncategorized'
if not articles.has_key(feed):
ans.append(feed)
articles[feed] = []
articles[feed].append(
feed = self.key if self.key is not None else 'Uncategorized'
if not self.articles.has_key(feed):
self.ans.append(feed)
self.articles[feed] = []
self.articles[feed].append(
dict(title=title, url=url, date=pubdate,
description=description, author=author,
content=''))
def parse_web_edition(self):
for (sec_title,index_url) in self.web_sections:
if self.includeSections != []:
if sec_title not in self.includeSections:
print "SECTION NOT INCLUDED: ",sec_title
continue
if sec_title in self.excludeSections:
print "SECTION EXCLUDED: ",sec_title
continue
print 'Index URL: '+'http://www.nytimes.com/pages/'+index_url+'/index.html'
soup = self.index_to_soup('http://www.nytimes.com/pages/'+index_url+'/index.html')
self.key = sec_title
# Find each article
for div in soup.findAll(True,
attrs={'class':['section-headline', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}):
if div['class'] in ['story', 'story headline'] :
self.handle_article(div)
elif div['class'] == 'headlinesOnly multiline flush':
for lidiv in div.findAll('li'):
self.handle_article(lidiv)
self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
return self.filter_ans(self.ans)
def parse_todays_index(self):
soup = self.index_to_soup('http://www.nytimes.com/pages/todayspaper/index.html')
skipping = False
# Find each article
for div in soup.findAll(True,
attrs={'class':['section-headline', 'story', 'story headline','sectionHeader','headlinesOnly multiline flush']}):
if div['class'] in ['section-headline','sectionHeader']:
key = string.capwords(feed_title(div))
key = key.replace('Op-ed','Op-Ed')
key = key.replace('U.s.','U.S.')
self.key = string.capwords(self.feed_title(div))
self.key = self.key.replace('Op-ed','Op-Ed')
self.key = self.key.replace('U.s.','U.S.')
self.key = self.key.replace('N.y.','N.Y.')
skipping = False
if self.includeSections != []:
if self.key not in self.includeSections:
print "SECTION NOT INCLUDED: ",self.key
skipping = True
if self.key in self.excludeSections:
print "SECTION EXCLUDED: ",self.key
skipping = True
elif div['class'] in ['story', 'story headline'] :
handle_article(div)
if not skipping:
self.handle_article(div)
elif div['class'] == 'headlinesOnly multiline flush':
for lidiv in div.findAll('li'):
handle_article(lidiv)
if not skipping:
self.handle_article(lidiv)
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
return self.filter_ans(ans)
self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
return self.filter_ans(self.ans)
def parse_headline_index(self):
articles = {}
ans = []
url_list = []
soup = self.index_to_soup('http://www.nytimes.com/pages/todaysheadlines/')
# Fetch the content table
@ -364,15 +482,24 @@ class NYTimes(BasicNewsRecipe):
for td_col in content_table.findAll('td', {'id' : re.compile('Column')}):
for div_sec in td_col.findAll('div',recursive=False):
for h6_sec_name in div_sec.findAll('h6',{'style' : re.compile('text-transform: *uppercase')}):
section_name = self.tag_to_string(h6_sec_name,use_alt=False)
section_name = re.sub(r'^ *$','',section_name)
if section_name == '':
continue
if self.includeSections != []:
if section_name not in self.includeSections:
print "SECTION NOT INCLUDED: ",section_name
continue
if section_name in self.excludeSections:
print "SECTION EXCLUDED: ",section_name
continue
section_name=string.capwords(section_name)
if section_name == 'U.s.':
section_name = 'U.S.'
elif section_name == 'Op-ed':
section_name = 'Op-Ed'
section_name = section_name.replace('Op-ed','Op-Ed')
section_name = section_name.replace('U.s.','U.S.')
section_name = section_name.replace('N.y.','N.Y.')
pubdate = strftime('%a, %d %b')
search_div = div_sec
@ -393,37 +520,32 @@ class NYTimes(BasicNewsRecipe):
if not a:
continue
url = re.sub(r'\?.*', '', a['href'])
if not url.startswith("http"):
continue
if not url.endswith(".html"):
continue
if 'podcast' in url:
continue
if 'video' in url:
if self.exclude_url(url):
continue
url += '?pagewanted=all'
if url in url_list:
if self.filterDuplicates:
if url in self.url_list:
continue
url_list.append(url)
self.log("URL %s" % url)
self.url_list.append(url)
title = self.tag_to_string(a, use_alt=True).strip()
desc = h3_item.find('p')
if desc is not None:
description = self.tag_to_string(desc,use_alt=False)
else:
description = ''
if not articles.has_key(section_name):
ans.append(section_name)
articles[section_name] = []
articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
if not self.articles.has_key(section_name):
self.ans.append(section_name)
self.articles[section_name] = []
self.articles[section_name].append(dict(title=title, url=url, date=pubdate, description=description, author=author, content=''))
ans = [(key, articles[key]) for key in ans if articles.has_key(key)]
return self.filter_ans(ans)
self.ans = [(k, self.articles[k]) for k in self.ans if self.articles.has_key(k)]
return self.filter_ans(self.ans)
def parse_index(self):
if self.headlinesOnly:
return self.parse_headline_index()
elif self.webEdition:
return self.parse_web_edition()
else:
return self.parse_todays_index()
@ -439,6 +561,21 @@ class NYTimes(BasicNewsRecipe):
def preprocess_html(self, soup):
if self.webEdition & (self.oldest_article>0):
date_tag = soup.find(True,attrs={'class': ['dateline','date']})
if date_tag:
date_str = self.tag_to_string(date_tag,use_alt=False)
date_str = date_str.replace('Published:','')
date_items = date_str.split(',')
try:
datestring = date_items[0]+' '+date_items[1]
article_date = self.decode_us_date(datestring)
except:
article_date = date.today()
if article_date < self.earliest_date:
self.log("Skipping article dated %s" % date_str)
return None
kicker_tag = soup.find(attrs={'class':'kicker'})
if kicker_tag: # remove Op_Ed author head shots
tagline = self.tag_to_string(kicker_tag)
@ -463,7 +600,6 @@ class NYTimes(BasicNewsRecipe):
for inlineImg in inlineImgs[1:]:
inlineImg.extract()
# Move firstImg before article body
#article_body = soup.find(True, {'id':'articleBody'})
cgFirst = soup.find(True, {'class':re.compile('columnGroup *first')})
if cgFirst:
# Strip all sibling NavigableStrings: noise

View File

@ -0,0 +1,63 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
'''
observa.com.uy
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Noticias(BasicNewsRecipe):
title = 'Observa Digital'
__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
description = 'Noticias desde Uruguay'
language = 'es'
timefmt = '[%a, %d %b, %Y]'
use_embedded_content = False
recursion = 5
encoding = 'utf8'
remove_javascript = True
no_stylesheets = True
oldest_article = 2
max_articles_per_feed = 100
keep_only_tags = [dict(id=['contenido'])]
remove_tags = [
dict(name='div', attrs={'id':'contenedorVinculadas'}),
dict(name='p', attrs={'id':'nota_firma'}),
dict(name=['object','link'])
]
remove_attributes = ['width','height', 'style', 'font', 'color']
extra_css = '''
h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
p {font-family:Arial,Helvetica,sans-serif;}
'''
feeds = [
(u'Actualidad', u'http://www.observa.com.uy/RSS/actualidad.xml'),
(u'Deportes', u'http://www.observa.com.uy/RSS/deportes.xml'),
(u'Vida', u'http://www.observa.com.uy/RSS/vida.xml'),
(u'Ciencia y Tecnologia', u'http://www.observa.com.uy/RSS/ciencia.xml')
]
def get_cover_url(self):
cover_url = None
index = 'http://www.elobservador.com.uy/elobservador/nav_portada.asp?suplemento=dia'
soup = self.index_to_soup(index)
link_item = soup.find('img',attrs={'usemap':'#mapeo_imagenes'})
if link_item:
cover_url = 'http://www.elobservador.com.uy'+link_item['src'].strip()
print cover_url
return cover_url
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -21,8 +21,16 @@ class Pagina12(BasicNewsRecipe):
use_embedded_content = False
language = 'es'
remove_empty_feeds = True
publication_type = 'newspaper'
masthead_url = 'http://www.pagina12.com.ar/commons/imgs/logo-home.gif'
extra_css = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} #autor{font-weight: bold} #fecha,#epigrafe{font-size: 0.9em; margin: 5px} #imagen{border: 1px solid black; margin: 0 0 1.25em 1.25em; width: 232px } '
extra_css = """
body{font-family: Arial,Helvetica,sans-serif }
img{margin-bottom: 0.4em; display:block}
#autor{font-weight: bold}
#fecha,#epigrafe{font-size: 0.9em; margin: 5px}
#imagen{border: 1px solid black; margin: 0 0 1.25em 1.25em; width: 232px }
.fgprincipal{font-size: large; font-weight: bold}
"""
conversion_options = {
'comment' : description
@ -31,7 +39,11 @@ class Pagina12(BasicNewsRecipe):
, 'language' : language
}
remove_tags = [dict(name='div', attrs={'id':['volver','logo','logo_suple','fin','permalink']})]
remove_tags = [
dict(name=['meta','link'])
,dict(name='div', attrs={'id':['volver','logo','logo_suple','fin','permalink']})
]
remove_attributes=['lang']
feeds = [
@ -65,4 +77,13 @@ class Pagina12(BasicNewsRecipe):
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('span', attrs={'id':'seccion'}):
it = item.a
it.name='span'
del it['href']
del it['title']
for item in soup.findAll('p'):
it = item.find('h3')
if it:
it.name='span'
return soup

View File

@ -0,0 +1,70 @@
#!/usr/bin/env python
from calibre.web.feeds.recipes import BasicNewsRecipe
class PCLab(BasicNewsRecipe):
cover_url = 'http://pclab.pl/img/logo.png'
title = u"PC Lab"
__author__ = 'ravcio - rlelusz[at]gmail.com'
description = u"Articles from PC Lab website"
language = 'pl'
oldest_article = 30.0
max_articles_per_feed = 100
recursions = 0
encoding = 'iso-8859-2'
no_stylesheets = True
remove_javascript = True
use_embedded_content = False
keep_only_tags = [
dict(name='div', attrs={'class':['substance']})
]
remove_tags = [
dict(name='div', attrs={'class':['chapters']})
,dict(name='div', attrs={'id':['script_bxad_slot_display_list_bxad_slot']})
]
remove_tags_after = [
dict(name='div', attrs={'class':['navigation']})
]
#links to RSS feeds
feeds = [ ('PCLab', u'http://pclab.pl/xml/artykuly.xml') ]
#load second and subsequent page content
# in: soup - full page with 'next' button
# out: appendtag - tag to which new page is to be added
def append_page(self, soup, appendtag):
# find the 'Next' button
pager = soup.find('div', attrs={'class':'next'})
if pager:
#search for 'a' element with link to next page (exit if not found)
a = pager.find('a')
if a:
nexturl = a['href']
soup2 = self.index_to_soup('http://pclab.pl/' + nexturl)
pagetext_substance = soup2.find('div', attrs={'class':'substance'})
pagetext = pagetext_substance.find('div', attrs={'class':'data'})
pagetext.extract()
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
pos = len(appendtag.contents)
self.append_page(soup2, appendtag)
def preprocess_html(self, soup):
# soup.body contains no title and no navigator, they are in soup
self.append_page(soup, soup.body)
# finally remove some tags
tags = soup.findAll('div',attrs={'class':['tags', 'index', 'script_bxad_slot_display_list_bxad_slot', 'index first', 'zumi', 'navigation']})
[tag.extract() for tag in tags]
return soup

View File

@ -1,13 +1,10 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008-2009, Darko Miletic <darko.miletic at gmail.com>'
__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
'''
politika.rs
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import Tag
class Politika(BasicNewsRecipe):
title = 'Politika Online'
@ -19,53 +16,51 @@ class Politika(BasicNewsRecipe):
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
remove_javascript = True
encoding = 'utf8'
delay = 1
language = 'sr'
lang = 'sr-Latn-RS'
direction = 'ltr'
extra_css = '@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)} @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: serif1, serif} .article_description{font-family: sans1, sans-serif}'
publication_type = 'newspaper'
masthead_url = 'http://static.politika.co.rs/images_new/politika.gif'
extra_css = """
@font-face {font-family: "serif1";src:url(res:///opt/sony/ebook/FONT/tt0011m_.ttf)}
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
body{font-family: Arial,Helvetica,sans1,sans-serif}
h1{font-family: "Times New Roman",Times,serif1,serif}
.articledescription{font-family: sans1, sans-serif}
"""
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : lang
, 'pretty_print' : True
, 'language' : language
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
keep_only_tags = [dict(name='div', attrs={'class':'content_center_border'})]
remove_tags = [
dict(name='div', attrs={'class':['send_print','txt-komentar']})
,dict(name=['object','link','a'])
,dict(name='h1', attrs={'class':'box_header-tags'})
]
keep_only_tags = [dict(name='div', attrs={'class':'big_article_home item_details'})]
remove_tags_after = dict(attrs={'class':'online_date'})
remove_tags = [dict(name=['link','meta','iframe','embed','object'])]
feeds = [
(u'Politika' , u'http://www.politika.rs/rubrike/Politika/index.1.lt.xml' )
,(u'Svet' , u'http://www.politika.rs/rubrike/Svet/index.1.lt.xml' )
,(u'Redakcijski komentari', u'http://www.politika.rs/rubrike/redakcijski-komentari/index.1.lt.xml')
,(u'Ostali komentari' , u'http://www.politika.rs/rubrike/ostali-komentari/index.1.lt.xml' )
,(u'Pogledi' , u'http://www.politika.rs/pogledi/index.lt.xml' )
,(u'Pogledi sa strane', u'http://www.politika.rs/rubrike/Pogledi-sa-strane/index.1.lt.xml')
,(u'Tema dana' , u'http://www.politika.rs/rubrike/tema-dana/index.1.lt.xml' )
,(u'Kultura' , u'http://www.politika.rs/rubrike/Kultura/index.1.lt.xml' )
,(u'Zivot i stil' , u'http://www.politika.rs/rubrike/zivot-i-stil/index.1.lt.xml' )
,(u'Spektar' , u'http://www.politika.rs/rubrike/zivot-i-stil/index.1.lt.xml' )
]
def preprocess_html(self, soup):
soup.html['lang'] = self.lang
soup.html['dir' ] = self.direction
mlang = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.lang)])
soup.head.insert(0,mlang)
for item in soup.findAll(style=True):
del item['style']
ftag = soup.find('div',attrs={'class':'content_center_border'})
if ftag.has_key('align'):
del ftag['align']
return self.adeify_images(soup)
for item in soup.findAll('a', attrs={'class':'category'}):
item.name='span'
if item.has_key('href'):
del item['href']
if item.has_key('title'):
del item['title']
return soup

View File

@ -0,0 +1,68 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2010, Mateusz Kielar, matek09@gmail.com'
from calibre.web.feeds.news import BasicNewsRecipe
class Polityka(BasicNewsRecipe):
title = u'Polityka'
__author__ = 'Mateusz Kielar'
description = 'Weekly magazine. Last archive issue'
encoding = 'utf-8'
no_stylesheets = True
language = 'en'
remove_javascript = True
remove_tags_before = dict(dict(name = 'h2', attrs = {'class' : 'box_nag'}))
remove_tags_after = dict(dict(name = 'div', attrs = {'class' : 'box_footer'}))
remove_tags =[]
remove_tags.append(dict(name = 'h2', attrs = {'class' : 'box_nag'}))
remove_tags.append(dict(name = 'div', attrs = {'class' : 'box_footer'}))
extra_css = '''
h1 {font-size: x-large; font-weight: bold}
'''
def parse_index(self):
soup = self.index_to_soup('http://archiwum.polityka.pl/')
box_img3 = soup.findAll(attrs={'class' : 'box_img3'})
feeds = []
last = 0
self.cover_url = 'http://archiwum.polityka.pl' + box_img3[-1].find('img')['src']
last_edition = 'http://archiwum.polityka.pl' + box_img3[-1].find('a')['href']
while True:
index = self.index_to_soup(last_edition)
box_list = index.findAll('div', attrs={'class' : 'box_list'})
if len(box_list) == 0:
break
articles = {}
for box in box_list:
for div in box.findAll('div', attrs={'class': 'list_tresc'}):
article_page = self.index_to_soup('http://archiwum.polityka.pl' + div.a['href'],)
section = self.tag_to_string(article_page.find('h2', attrs = {'class' : 'box_nag'})).split('/')[0].lstrip().rstrip()
print section
if not articles.has_key(section):
articles[section] = []
articles[section].append( {
'title' : self.tag_to_string(div.a),
'url' : 'http://archiwum.polityka.pl' + div.a['href'],
'date' : '',
'description' : ''
})
for section in articles:
feeds.append((section, articles[section]))
last_edition = last_edition.replace('http://archiwum.polityka.pl/wydanie/' + str(last), 'http://archiwum.polityka.pl/wydanie/' + str(last + 1))
last = last + 1
return feeds

View File

@ -0,0 +1,18 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1290013720(BasicNewsRecipe):
title = u'RDS'
__author__ = 'Nexus'
language = 'en_CA'
description = 'Hockey News'
oldest_article = 7
max_articles_per_feed = 25
no_stylesheets = True
remove_tags = [dict(name='div', attrs={'id':'rdsWrap'}),
dict(name='table', attrs={'id':'aVoir'}),
dict(name='div', attrs={'id':'imageChronique'})]
keep_only_tags = [dict(name='div', attrs={'id':['enteteChronique']}),
dict(name='div', attrs={'id':['contenuChronique']})]
feeds = [(u'RDS', u'http://www.rds.ca/hockey/fildepresse_rds.xml')]

View File

@ -0,0 +1,54 @@
#!/usr/bin/env python
__license__ = 'GPL v3'
__author__ = '2010, Gustavo Azambuja <hola at gazambuja.com>'
'''
http://www.revistabla.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Noticias(BasicNewsRecipe):
title = 'Revista Bla'
__author__ = 'Gustavo Azambuja'
description = 'Moda | Uruguay'
language = 'es'
timefmt = '[%a, %d %b, %Y]'
use_embedded_content = False
recursion = 5
encoding = 'utf8'
remove_javascript = True
no_stylesheets = True
oldest_article = 20
max_articles_per_feed = 100
keep_only_tags = [dict(id=['body_container'])]
remove_tags = [
dict(name='div', attrs={'class':['date_text', 'comments', 'form_section', 'share_it']}),
dict(name='div', attrs={'id':['relatedPosts', 'spacer', 'banner_izquierda', 'right_container']}),
dict(name='p', attrs={'class':'FacebookLikeButton'}),
dict(name=['object','link']) ]
extra_css = '''
h1{font-family:Geneva, Arial, Helvetica, sans-serif;color:#154B7A;}
h3{font-size: 14px;color:#999999; font-family:Geneva, Arial, Helvetica, sans-serif;font-weight: bold;}
h2{color:#666666; font-family:Geneva, Arial, Helvetica, sans-serif;font-size:small;}
p {font-family:Arial,Helvetica,sans-serif;}
'''
feeds = [
(u'Articulos', u'http://www.revistabla.com/feed/')
]
def get_cover_url(self):
cover_url = None
index = 'http://www.revistabla.com'
soup = self.index_to_soup(index)
link_item = soup.find('div',attrs={'class':'header_right'})
if link_item:
cover_url = link_item.img['src']
return cover_url
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -108,3 +108,10 @@ class RevistaMuyInteresante(BasicNewsRecipe):
feeds.append((title, articles))
return feeds
def get_cover_url(self):
index = 'http://www.muyinteresante.es/revista'
soup = self.index_to_soup(index)
link_item = soup.find('img',attrs={'class':'img_portada'})
if link_item:
cover_url = "http://www.muyinteresante.es"+link_item['src']
return cover_url

View File

@ -0,0 +1,69 @@
__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
rollingstone.com
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class RollingStone(BasicNewsRecipe):
title = 'Rolling Stone Magazine - free content'
__author__ = 'Darko Miletic'
description = 'Rolling Stone Magazine features music, album and artist news, movie reviews, political, economic and pop culture commentary, videos, photos, and more.'
publisher = 'Werner Media inc.'
category = 'news, music, USA, world'
oldest_article = 15
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'utf8'
use_embedded_content = False
language = 'en'
remove_empty_feeds = True
publication_type = 'magazine'
masthead_url = 'http://www.rollingstone.com/templates/rolling-stone-templates/theme/rstheme/images/rsLogo.png'
extra_css = """
body{font-family: Georgia,Times,serif }
img{margin-bottom: 0.4em; display:block}
"""
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
preprocess_regexps = [
(re.compile(r'xml:lang="en">.*?<head>', re.DOTALL|re.IGNORECASE),lambda match: 'xml:lang="en">\n<head>\n')
,(re.compile(r'</title>.*?</head>' , re.DOTALL|re.IGNORECASE),lambda match: '</title>\n</head>\n' )
]
keep_only_tags=[
dict(attrs={'class':['headerImgHolder','headerContent']})
,dict(name='div',attrs={'id':['teaser','storyTextContainer']})
,dict(name='div',attrs={'class':'blogDetailModule clearfix'})
]
remove_tags = [
dict(name=['meta','iframe','object','embed'])
,dict(attrs={'id':'mpStoryHeader'})
,dict(attrs={'class':'relatedTopics'})
]
remove_attributes=['lang','onclick','width','height','name']
remove_tags_before=dict(attrs={'class':'bloggerInfo'})
remove_tags_after=dict(attrs={'class':'relatedTopics'})
feeds = [
(u'All News' , u'http://www.rollingstone.com/siteServices/rss/allNews' )
,(u'All Blogs' , u'http://www.rollingstone.com/siteServices/rss/allBlogs' )
,(u'Movie Reviews' , u'http://www.rollingstone.com/siteServices/rss/movieReviews' )
,(u'Album Reviews' , u'http://www.rollingstone.com/siteServices/rss/albumReviews' )
,(u'Song Reviews' , u'http://www.rollingstone.com/siteServices/rss/songReviews' )
]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return soup

View File

@ -0,0 +1,73 @@
from calibre.web.feeds.news import BasicNewsRecipe, LoginFailed
class SCPrintMagazine(BasicNewsRecipe):
title = u'SC Print Magazine'
__author__ = u'Tony Maro'
description = u'Last print version of the data security magazine'
INDEX = "http://www.scmagazineus.com/issuearchive/"
no_stylesheets = True
language = 'en'
keep_only_tags = [dict(id=['article','review'])]
remove_tags = [dict(id=['articlePrintTools','reviewBodyColumn'])]
LOG_IN = 'http://www.scmagazineus.com/login/'
tags = 'News,SC Magazine'
needs_subscription = True
def parse_index(self):
articles = []
issuelink = printsections = None
soup = self.index_to_soup(self.INDEX)
sectit = soup.find('div', attrs={'class':'issueArchiveItem'})
if sectit is not None:
linkt = sectit.find('a')
issuelink = linkt['href']
imgt = sectit.find('img')
self.cover_url = imgt['src']
if issuelink is not None:
issue = self.index_to_soup(issuelink)
if issue is not None:
printsections = issue.findAll('div',attrs={'class':'PrintSection'})
if printsections is not None:
for printsection in printsections:
onesection = []
sectiontitle = printsection.find('h3').contents[0]
articlesec = printsection.findAll('div',attrs={'class':'IssueArchiveFormat'})
if articlesec is not None:
''' got articles '''
for onearticle in articlesec:
''' process one article '''
arttitlet = onearticle.find('h3')
if arttitlet is not None:
mylink = arttitlet.find('a')
if mylink is not None:
if mylink.has_key('title'):
arttitle = mylink['title']
else:
arttitle = 'unknown'
if mylink.has_key('href'):
artlink = mylink['href']
artlink = artlink.replace("/article","/printarticle")
artlink = artlink.replace("/review","/printreview")
deck = onearticle.find('div',attrs={'class':'deck'})
if deck is not None:
deck = deck.contents[0]
onesection.append({'title':arttitle, 'url':artlink, 'description':deck,'date':''})
articles.append((sectiontitle, onesection))
return articles
def get_browser(self):
br = BasicNewsRecipe.get_browser(self)
br.open(self.LOG_IN)
br.select_form(name='aspnetForm')
br['ctl00$ctl00$cphAllPageContent$cphMainContent$SubscriberEasyLoginView1$txtEmail'] = self.username
br['ctl00$ctl00$cphAllPageContent$cphMainContent$SubscriberEasyLoginView1$txtPassword'] = self.password
raw = br.submit("ctl00$ctl00$cphAllPageContent$cphMainContent$SubscriberEasyLoginView1$btnLogin").read()
if 'Logout</a>' not in raw:
raise LoginFailed(
_('Failed to log in, check your username and password for'
' the calibre Periodicals service.'))
return br

View File

@ -0,0 +1,55 @@
# coding: utf-8
__license__ = 'GPL v3'
__copyright__ = '2010, BlonG'
'''
www.siol.si
'''
from calibre.web.feeds.news import BasicNewsRecipe
class Siol(BasicNewsRecipe):
title = u'Siol.net'
__author__ = u'BlonG'
description = "Multimedijski portal z aktualnimi vsebinami, intervjuji, komentarji iz Slovenije in sveta, sportal, trendi, avtomoto, blogos"
oldest_article = 3
language = 'sl'
max_articles_per_feed = 20
no_stylesheets = True
use_embedded_content = False
cover_url = 'https://sites.google.com/site/javno2010/home/siol_cover.jpg'
extra_css = '''
h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
h2{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
'''
html2lrf_options = ['--base-font-size', '10']
keep_only_tags = [
dict(name='div', attrs={'id':'idContent'}),
]
remove_tags = [
dict(name='span', attrs={'class':'com1'}),
dict(name='div', attrs={'class':'relation'}),
dict(name='p', attrs={'class':'path'}),
dict(name='div', attrs={'class':'clear_r'}),
dict(name='div', attrs={'id':'appendix'}),
dict(name='div', attrs={'id':'rail'}),
dict(name='div', attrs={'id':'div_comments'}),
dict(name='div', attrs={'class':'thumbs'}),
]
feeds = [
(u'Slovenija', u'http://www.siol.net/rss.aspx?path=Slovenija')
,(u'Lokalne novice', u'http://www.siol.net/rss.aspx?path=Slovenija/Lokalne_novice')
,(u'EU', u'http://www.siol.net/rss.aspx?path=EU')
,(u'Svet', u'http://www.siol.net/rss.aspx?path=Svet')
,(u'Gospodarstvo', u'http://www.siol.net/rss.aspx?path=Gospodarstvo')
,(u'Sportal', u'http://www.siol.net/rss.aspx?path=Sportal')
,(u'Trendi', u'http://www.siol.net/rss.aspx?path=Trendi')
,(u'Avtomoto', u'http://www.siol.net/rss.aspx?path=Avtomoto')
,(u'Tehnologija', u'http://www.siol.net/rss.aspx?path=Tehnologija')
,(u'TV / Film', u'http://www.siol.net/rss.aspx?path=TV')
]

View File

@ -6,6 +6,7 @@ __copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
spiegel.de
'''
from time import strftime
from calibre.web.feeds.news import BasicNewsRecipe
class Spiegel_ger(BasicNewsRecipe):
@ -44,3 +45,6 @@ class Spiegel_ger(BasicNewsRecipe):
rmain, rsep, rrest = main.rpartition(',')
purl = rmain + ',druck-' + rrest + ',' + rest
return purl
def get_cover_url(self):
return 'http://wissen.spiegel.de/wissen/titel/SP/' + strftime("%Y/%W/%j/titel.jpg")

View File

@ -7,7 +7,7 @@ class AdvancedUserRecipe1284927619(BasicNewsRecipe):
__author__ = 'noxxx'
max_articles_per_feed = 100
description = 'tagesanzeiger.ch: Nichts verpassen'
category = 'News, Politik, Nachrichten, Schweiz, Zürich'
category = 'News, Politik, Nachrichten, Schweiz, Zuerich'
language = 'de'
conversion_options = {

View File

@ -3,12 +3,12 @@
__license__ = 'GPL v3'
__copyright__ = '2009, Gerhard Aigner <gerhard.aigner at gmail.com>'
''' http://www.derstandard.at - Austrian Newspaper '''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class TelepolisNews(BasicNewsRecipe):
title = u'Telepolis (News)'
title = u'Telepolis (News+Artikel)'
__author__ = 'Gerhard Aigner'
publisher = 'Heise Zeitschriften Verlag GmbH & Co KG'
description = 'News from telepolis'
@ -26,10 +26,10 @@ class TelepolisNews(BasicNewsRecipe):
preprocess_regexps = [(re.compile(r'<a[^>]*>', re.DOTALL|re.IGNORECASE), lambda match: ''),
(re.compile(r'</a>', re.DOTALL|re.IGNORECASE), lambda match: ''),]
keep_only_tags = [dict(name = 'table',attrs={'class':'blogtable'})]
remove_tags = [dict(name='img'), dict(name='td',attrs={'class':'blogbottom'})]
keep_only_tags = [dict(name = 'td',attrs={'class':'bloghead'}),dict(name = 'td',attrs={'class':'blogfliess'})]
remove_tags = [dict(name='img'), dict(name='td',attrs={'class':'blogbottom'}), dict(name='td',attrs={'class':'forum'})]
feeds = [(u'News', u'http://www.heise.de/tp/news.rdf')]
feeds = [(u'News', u'http://www.heise.de/tp/news-atom.xml')]
html2lrf_options = [
'--comment' , description
@ -41,7 +41,7 @@ class TelepolisNews(BasicNewsRecipe):
def get_article_url(self, article):
'''if the linked article is of kind artikel don't take it'''
if (article.link.count('artikel') > 0) :
if (article.link.count('artikel') > 1) :
return None
return article.link
@ -49,3 +49,5 @@ class TelepolisNews(BasicNewsRecipe):
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
soup.head.insert(0,mtag)
return soup

View File

@ -6,6 +6,8 @@ __copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
www.h-online.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
class TheHeiseOnline(BasicNewsRecipe):
title = u'The H'
__author__ = 'Hiroshi Miura'

View File

@ -0,0 +1,19 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1289990851(BasicNewsRecipe):
title = u'The Hockey News'
language = 'en_CA'
__author__ = 'Nexus'
oldest_article = 7
max_articles_per_feed = 25
no_stylesheets = True
remove_tags = [dict(name='div', attrs={'class':'article_info'}),
dict(name='div', attrs={'class':'photo_details'}),
dict(name='div', attrs={'class':'tool_menu'}),
dict(name='div', attrs={'id':'comments_container'}),
dict(name='div', attrs={'id':'wrapper'})]
keep_only_tags = [dict(name='h1', attrs={'class':['headline']}),
dict(name='div', attrs={'class':['box_container']})]
feeds = [(u'THN', u'http://www.thehockeynews.com/rss/all_categories.xml')]

View File

@ -0,0 +1,34 @@
from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1289990851(BasicNewsRecipe):
title = u'TSN'
oldest_article = 7
max_articles_per_feed = 50
language = 'en_CA'
__author__ = 'Nexus'
no_stylesheets = True
INDEX = 'http://tsn.ca/nhl/story/?id=nhl'
keep_only_tags = [dict(name='div', attrs={'id':['tsnColWrap']}),
dict(name='div', attrs={'id':['tsnStory']})]
remove_tags = [dict(name='div', attrs={'id':'tsnRelated'}),
dict(name='div', attrs={'class':'textSize'})]
def parse_index(self):
feeds = []
soup = self.index_to_soup(self.INDEX)
feed_parts = soup.findAll('div', attrs={'class': 'feature'})
for feed_part in feed_parts:
articles = []
if not feed_part.h2:
continue
feed_title = feed_part.h2.string
article_parts = feed_part.findAll('a')
for article_part in article_parts:
article_title = article_part.string
article_date = ''
article_url = 'http://tsn.ca/' + article_part['href']
articles.append({'title': article_title, 'url': article_url, 'description':'', 'date':article_date})
if articles:
feeds.append((feed_title, articles))
return feeds

View File

@ -0,0 +1,195 @@
#!/usr/bin/env python
u'''
Ведомости
'''
from calibre.web.feeds.feedparser import parse
from calibre.ebooks.BeautifulSoup import Tag
from calibre.web.feeds.news import BasicNewsRecipe
class VedomostiRecipe(BasicNewsRecipe):
title = u'Ведомости'
__author__ = 'Nikolai Kotchetkov'
publisher = 'vedomosti.ru'
category = 'press, Russia'
description = u'Ежедневная деловая газета'
oldest_article = 3
max_articles_per_feed = 100
masthead_url = u'http://motorro.com/imgdir/logos/ved_logo_black2_cropped.gif'
cover_url = u'http://motorro.com/imgdir/logos/ved_logo_black2_cropped.gif'
#Add feed names if you want them to be sorted (feeds of this list appear first)
sortOrder = [u'_default', u'Первая полоса', u'Власть и деньги']
encoding = 'cp1251'
language = 'ru'
no_stylesheets = True
remove_javascript = True
recursions = 0
conversion_options = {
'comment' : description
, 'tags' : category
, 'publisher' : publisher
, 'language' : language
}
keep_only_tags = [dict(name='td', attrs={'class' : ['second_content']})]
remove_tags_after = [dict(name='div', attrs={'class' : 'article_text'})]
remove_tags = [dict(name='div', attrs={'class' : ['sep', 'choice', 'articleRightTbl']})]
feeds = [u'http://www.vedomosti.ru/newspaper/out/rss.xml']
#base URL for relative links
base_url = u'http://www.vedomosti.ru'
extra_css = 'h1 {font-size: 1.5em; margin: 0em 0em 0em 0em; text-align: center;}'\
'h2 {font-size: 1.0em; margin: 0em 0em 0em 0em;}'\
'h3 {font-size: 0.8em; margin: 0em 0em 0em 0em;}'\
'.article_date {font-size: 0.5em; color: gray; font-family: monospace; text-align:right;}'\
'.article_authors {font-size: 0.5em; color: gray; font-family: monospace; text-align:right;}'\
'.article_img {width:100%; text-align: center; padding: 3px 3px 3px 3px;}'\
'.article_img_desc {width:100%; text-align: center; font-size: 0.5em; color: gray; font-family: monospace;}'\
'.article_desc {font-size: 1em; font-style:italic;}'
def parse_index(self):
try:
feedData = parse(self.feeds[0])
if not feedData:
raise NotImplementedError
self.log("parse_index: Feed loaded successfully.")
if feedData.feed.has_key('title'):
self.title = feedData.feed.title
self.log("parse_index: Title updated to: ", self.title)
if feedData.feed.has_key('description'):
self.description = feedData.feed.description
self.log("parse_index: Description updated to: ", self.description)
def get_virtual_feed_articles(feed):
if feeds.has_key(feed):
return feeds[feed][1]
self.log("Adding new feed: ", feed)
articles = []
feeds[feed] = (feed, articles)
return articles
feeds = {}
#Iterate feed items and distribute articles using tags
for item in feedData.entries:
link = item.get('link', '');
title = item.get('title', '');
if '' == link or '' == title:
continue
article = {'title':title, 'url':link, 'description':item.get('description', ''), 'date':item.get('date', ''), 'content':''};
if not item.has_key('tags'):
get_virtual_feed_articles('_default').append(article)
continue
for tag in item.tags:
addedToDefault = False
term = tag.get('term', '')
if '' == term:
if (not addedToDefault):
get_virtual_feed_articles('_default').append(article)
continue
get_virtual_feed_articles(term).append(article)
#Get feed list
#Select sorted feeds first of all
result = []
for feedName in self.sortOrder:
if (not feeds.has_key(feedName)): continue
result.append(feeds[feedName])
del feeds[feedName]
result = result + feeds.values()
return result
except Exception, err:
self.log(err)
raise NotImplementedError
def preprocess_html(self, soup):
return self.adeify_images(soup)
def postprocess_html(self, soup, first_fetch):
#self.log('Original: ', soup.prettify())
#Find article
contents = soup.find('div', {'class':['article_text']})
if not contents:
self.log('postprocess_html: article div not found!')
return soup
contents.extract()
#Find title
title = soup.find('h1')
if title:
contents.insert(0, title)
#Find article image
newstop = soup.find('div', {'class':['newstop']})
if newstop:
img = newstop.find('img')
if img:
imgDiv = Tag(soup, 'div')
imgDiv['class'] = 'article_img'
if img.has_key('width'):
del(img['width'])
if img.has_key('height'):
del(img['height'])
#find description
element = img.parent.nextSibling
img.extract()
imgDiv.insert(0, img)
while element:
if not isinstance(element, Tag):
continue
nextElement = element.nextSibling
if 'p' == element.name:
element.extract()
element['class'] = 'article_img_desc'
imgDiv.insert(len(imgDiv.contents), element)
element = nextElement
contents.insert(1, imgDiv)
#find article abstract
abstract = soup.find('p', {'class':['subhead']})
if abstract:
abstract['class'] = 'article_desc'
contents.insert(2, abstract)
#Find article authors
authorsDiv = soup.find('div', {'class':['autors']})
if authorsDiv:
authorsP = authorsDiv.find('p')
if authorsP:
authorsP['class'] = 'article_authors'
contents.insert(len(contents.contents), authorsP)
#Fix urls that use relative path
urls = contents.findAll('a');
if urls:
for url in urls:
if not url.has_key('href'):
continue
if '/' == url['href'][0]:
url['href'] = self.base_url + url['href']
body = soup.find('td', {'class':['second_content']})
if body:
body.replaceWith(contents)
self.log('Result: ', soup.prettify())
return soup

View File

@ -31,8 +31,9 @@ class WashingtonPost(BasicNewsRecipe):
('Education', 'http://www.washingtonpost.com/wp-dyn/rss/education/index.xml'),
('Style',
'http://www.washingtonpost.com/wp-dyn/rss/print/style/index.xml'),
('Sports',
'http://feeds.washingtonpost.com/wp-dyn/rss/linkset/2010/08/19/LI2010081904067_xml'),
('NFL Sports',
'http://www.washingtonpost.com/wp-dyn/rss/sports/index/nfl/index.xml'),
('Redskins', 'http://www.washingtonpost.com/wp-dyn/rss/sports/redskins/index.xml'),
('Editorials', 'http://www.washingtonpost.com/wp-dyn/rss/linkset/2005/05/30/LI2005053000331.xml'),
]

View File

@ -12,6 +12,7 @@ class ZeitDe(BasicNewsRecipe):
title = 'Zeit Online'
description = 'Zeit Online'
language = 'de'
encoding = 'UTF-8'
__author__ = 'Martin Pitt, Sujata Raman, Ingo Paschke and Marc Toensing'
@ -43,7 +44,7 @@ class ZeitDe(BasicNewsRecipe):
('Sport', 'http://newsfeed.zeit.de/sport/index'),
]
extra_css = '.reaktion,.taglist,.comments,.reponse,.responsetitle,.responsebody,.reponse,.inline,.date{display:none;}li.date{display:block}'
extra_css = '.excerpt{font-size:1em}.reaktion,.taglist,.comments,.reponse,.responsetitle,.responsebody,.reponse,.inline,.date{display:none;}li.date{display:block}'
#filter_regexps = [r'ad.de.doubleclick.net/']
@ -55,6 +56,16 @@ class ZeitDe(BasicNewsRecipe):
ans = None
return ans
def preprocess_html(self, soup):
for tag in soup.findAll(name=['ul','li']):
tag.name = 'div'
soup.html['xml:lang'] = self.lang
soup.html['lang'] = self.lang
mtag = '<meta http-equiv="Content-Type" content="text/html; charset=' + self.encoding + '">'
soup.head.insert(0,mtag)
return soup
def get_cover_url(self):
try:
inhalt = self.index_to_soup('http://www.zeit.de/inhalt')

View File

@ -0,0 +1,63 @@
#!/usr/bin/env python
# -*- coding: utf-8 mode: python -*-
__license__ = 'GPL v3'
__copyright__ = '2010, Steffen Siebert <calibre at steffensiebert.de>'
__docformat__ = 'restructuredtext de'
__version__ = '1.1'
"""
Die Zeit EPUB
"""
import os, urllib2, zipfile, re
from calibre.web.feeds.news import BasicNewsRecipe
from calibre.ptempfile import PersistentTemporaryFile
class ZeitEPUBAbo(BasicNewsRecipe):
title = u'Zeit Online Premium'
description = u'Das EPUB Abo der Zeit (needs subscription)'
language = 'de'
lang = 'de-DE'
__author__ = 'Steffen Siebert'
needs_subscription = True
conversion_options = {
'no_default_epub_cover' : True
}
def build_index(self):
domain = "http://premium.zeit.de"
url = domain + "/abovorteile/cgi-bin/_er_member/p4z.fpl?ER_Do=getUserData&ER_NextTemplate=login_ok"
browser = self.get_browser()
browser.add_password("http://premium.zeit.de", self.username, self.password)
try:
browser.open(url)
except urllib2.HTTPError:
self.report_progress(0,_("Can't login to download issue"))
raise ValueError('Failed to login, check your username and password')
response = browser.follow_link(text="DIE ZEIT als E-Paper")
response = browser.follow_link(url_regex=re.compile('^http://contentserver.hgv-online.de/nodrm/fulfillment\\?distributor=zeit-online&orderid=zeit_online.*'))
tmp = PersistentTemporaryFile(suffix='.epub')
self.report_progress(0,_('downloading epub'))
tmp.write(response.read())
tmp.close()
zfile = zipfile.ZipFile(tmp.name, 'r')
self.report_progress(0,_('extracting epub'))
zfile.extractall(self.output_dir)
tmp.close()
index = os.path.join(self.output_dir, 'content.opf')
self.report_progress(1,_('epub downloaded and extracted'))
return index

View File

@ -363,13 +363,16 @@
</xsl:template>
<xsl:template match="rtf:row">
<xsl:element name="row">
<xsl:element name="tr">
<xsl:apply-templates/>
</xsl:element>
</xsl:template>
<xsl:template match="rtf:cell">
<xsl:element name="cell">
<xsl:element name="td">
<xsl:if test="@class">
<xsl:attribute name="class"><xsl:value-of select="@class"/></xsl:attribute>
</xsl:if>
<xsl:apply-templates/>
</xsl:element>
</xsl:template>

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

View File

@ -1,13 +1,13 @@
// For questions about the Bengali hyphenation patterns
// For questions about the Bengali hyphenation patterns
// ask Santhosh Thottingal (santhosh dot thottingal at gmail dot com)
Hyphenator.languages.bn = {
Hyphenator.languages['bn'] = {
leftmin : 2,
rightmin : 2,
shortestPattern : 1,
longestPattern : 1,
specialChars : unescape('আঅইঈউঊঋএঐঔকগখঘঙচছজঝঞটঠডঢণতথদধনপফবভমযরলশষসহিীাুূৃোোৈৌৗ্ঃং%u200D'),
specialChars : unescape("আঅইঈউঊঋএঐঔকগখঘঙচছজঝঞটঠডঢণতথদধনপফবভমযরলশষসহিীাুূৃোোৈৌৗ্ঃং%u200D"),
patterns : {
2 : 'অ1আ1ই1ঈ1উ1ঊ1ঋ1এ1ঐ1ঔ1ি1া1ী1ু1ৃ1ে1ো1ৌ1ৗ1্2ঃ1ং11ক1গ1খ1ঘ1ঙ1চ1ছ1জ1ঝ1ঞ1ট1ঠ1ড1ঢ1ণ1ত1থ1দ1ধ1ন1প1ফ1ব1ভ1ম1য1র1ল1শ1ষ1স1হ',
3 : '2ঃ12ং1'
2 : "অ1আ1ই1ঈ1উ1ঊ1ঋ1এ1ঐ1ঔ1ি1া1ী1ু1ৃ1ে1ো1ৌ1ৗ1্2ঃ1ং11ক1গ1খ1ঘ1ঙ1চ1ছ1জ1ঝ1ঞ1ট1ঠ1ড1ঢ1ণ1ত1থ1দ1ধ1ন1প1ফ1ব1ভ1ম1য1র1ল1শ1ষ1স1হ",
3 : "2ঃ12ং1"
}
};

View File

@ -1,11 +1,11 @@
// For questions about the Czech hyphenation patterns
// For questions about the Czech hyphenation patterns
// ask Martin Hasoň (martin dot hason at gmail dot com)
Hyphenator.languages.cs = {
Hyphenator.languages['cs'] = {
leftmin : 2,
rightmin : 2,
shortestPattern : 1,
longestPattern : 6,
specialChars : 'ěščřžýáíéúůťď',
specialChars : "ěščřžýáíéúůťď",
patterns : {
2 : "a11f1g1k1n1pu11vy11zé11ňó11š1ť1ú1ž",
3 : "_a2_b2_c2_d2_e2_g2_h2_i2_j2_k2_l2_m2_o2_p2_r2_s2_t2_u2_v2_z2_č2_é2_í2_ó2_š2_ú2_ž22a_a2da2ga2ia2ka2ra2sa2ta2u2av2aya2ča2ňa2ť2b_b1db1h1bib1j2bkb1m2bn1bob2z1bá1bí2bň2c_1ca2cc1ce1ci2cl2cn1coc2p2ctcy21cá1cí2cň1ců2d_1dad1bd1d1de1did1j2dkd1m2dn1dod1t1dud2v1dy1dá1dé1dě1dí2dň1dů1dý2e_e1ae1be1ee1ie2ke1o2ere1se1te1ue1áe2ňe1ře2šeú12f_f2l2fn2fr2fs2ft2féf2ú2g_2gngo12h_h2bh2c2hd2hkh2mh2rh1č2hňhř2h2ž2i_i1ai1bi1di1hi1ji1li1mi2ni1oi1ri1ti1xi1ái2ďi1éi1ói1ři2ši2ž2j_j2d1jij1j2jkj2m2jn2jp2jz2jď1jí2jž2k_k2dk2e2kf2kkk2l2kn2ks2kčk2ň2l_2lf2lg2lh1li2lj2lk2ll2ln2lp2lv2lz2lň1lů1lý2m_1ma1me2mf1mim2l2mn1mo2mp1mu2mv2mz2mčm2ž2n_2nb2nf2ngn1j2nk2nn2nz2nď2nónů22nž2o_o1ao1cog2o1ho1io1jo1lo1mo2no1oo1to2uo1xo2zo1čo2ňo1ř2p_2pkp2l2pn2pp2ptpá12pč2pš2pťqu22r_r1br1cr1d2rkr1l2rn2rrr1x2rzr1č2ró2rš2s_s2cs2d1se2sf1sis2js2k2sn1sos2p1sr2ss1sus2v1sé1sí2sň2sť1sůs2ž2t_1te2tf2tg1ti2tl2tm2tn1to2tpt2vt2č1té1tě2tř2tš1tů2u_u2b2ufu2ku2mu2nu2pu2ru2su2vu2zu2ču2ďu2ňu2šu2ž2v_2vkv2l2vm2vnv2p2vňwe22x_2xf2xnx1ty2ay2ey2sy2ňy2šyž22z_2zbz2ez2j2zl2ztz2v2zzzá12zč2zňz2řá1bá1dá1já1sá2ňá1řá2š2č_1ča2čb1če1či2čk2čn1čoč2p2čs1ču1čá1čí1čů2ď_1ďa1ďoé2dé2fé2lé2mé2sé2té2šé2žě1cě1lě2vě2zě1řě2šě2ťě2ží1bí1hí1jí1lí1rí1tí2ňí1ří2š2ň_2ňa2ňk2ňmň1só2z2ř_2řc2řdři12řk2řn1řoř2v2řz2řš2š_2šl2šnš2p2štš2vš2ň2ť_2ťk2ťm2ťtú2dú2kú2lú2nú2pú2tú2vú2zú2čú2žů1bů1cůt2ů2vů2zů2žý1bý1dý1hý1jý1lý2ný1rý1tý1uý1ř2ž_2žk2žl2žnž2v2žď2žň2žš",

View File

@ -1,10 +1,10 @@
Hyphenator.languages.da = {
'leftmin' : 2,
'rightmin' : 2,
'shortestPattern' : 2,
'longestPattern' : 8,
'specialChars' : 'æøå',
'patterns' : {
Hyphenator.languages['da'] = {
leftmin : 2,
rightmin : 2,
shortestPattern : 2,
longestPattern : 8,
specialChars : "æøå",
patterns : {
3 : "a3ca1ea3ha3ja5oa5z1ba4bd1be1bib1j1bo4bsb5t3bub5w1by1ce3chck35cy3dad1b1ded1fd1gd3h1did3jd1kd1ld1m3dod1p1dud1v3dye3ee1he5x1faf1bf1d1fef1ff1gf1h1fif1k3fl1fof1p4ft1fuf1v3fy1gag1bg1d1geg3fg1gg1h1gi5gjg3kg1lg1m3gog3p1grg3v1gyi1ai3bi1ci3hi5ii5ji1uj5kj3rk5bk3hk1kk1tl1bl1fl3hl3jl1ll3r4ls1mam1bm3d1mem3fm1gm3h1mim3km1lm1mm1n3mom1r3my3nan1bn1c4nd1nen1f1nin1mn1n1non5pn3r4ns3nyn3zo3ao1co1eo5ho1jo3t3pap3dp3fp3mp3np1t1pup5vqu4r1br1fr1hr1lr1nr3pr1rs1d1ses1fs1msp44tbt1ht1mt1n4tsu1au1eu3iu5qv5hv5jv5kvl41vov5pv5t3vuy3ay3ey5o5bæ3dæ3døe3æe5å3fæ3fø3gæ3gåi3ø3kø3kå1mæ3mø3må3næ5nøo5åpå31sæ1sø5våæ3cæ3eæ5iæ5oø3eå1då1eå5hå3lå3t",
4 : "_ae3_om1_po15adg5afgaf3r5afsa4gia4gya5kaa3kea5kraku5a3laa1lea1lial3ka1loa3lua1lya3nu3anva5pea3pia5poa1ra1arba1re5arga1ria3roa3saa3sca1sia3ska3soa1tea1tia1toa5tra1tua5vaa1vebe1k4b1n1br4bs5kb3sob1stby5s4c1c4ch_ci4oda4sd1d4de5ddi1edi5l4d1n4dopd5ovd5rud4smd4sud3tad1tedt5od5trdt5udub5e5ade3afe5age3ake1ale3ane5ape3ate3blebs3e1cie4do3effe3fr3efte3gue3inei5se3jee1kae3kee3kle5kre3kue1kve5kye3lee1lie3loe5lue3lyem1s4enne4noe5nue5ole3ope1ore3ovepi3e1pre3rae1ree1rier1ker3se5rye1tae1tee1tie3tje1toe3tre3tue1tye3ume3un3eure1vae3vee1vifej4f1s4f3taf1tef1tif5toge3sgi4bg5ovgs1ag4segs1pgs1vg3tag1teg1tig5tog3trgt4sg3udgun5g5yd4ha_he5s4hethi4ehi3s4h3thun4hvo4i3dri1eli1eni3erif3ri3gui1kai1keik1li5koi3kuik3vi3liil3ki1loil5ui3mu5infin3si3nui3odi3ogi5oki3olion4i3oti5pii5pri3rei3riir5ti3sci3sii4smis3pi1tai1tei1tii3toi3tri1tui3tyi1vai1vei1vij3agjds1j3lej3lijre5ju3s5kapk5au5kavki3ek1le3kluk4ny5kod1konko3v1kra5kryk1siks3kks1pks5vkt5s3kur1kus3kutk4vok4vu5lab5lam1latl3dr1le_5led3len1ler1les4leuli5ol1kel1kol3kyl5mul3op3lov4l3pl4psl5sjl1tal1tel3tilt3ol3trl3tulu5ll3vel3vimi3kmi4o4mopm1pem3pim3plm1pom3prm5skms3pms5vm3tam3tem3tim3trm1ud1mul4nak1naln3drne5aneo4n4go4n1h4nimni5on1ken1kon3krn3kun5kv4n1ln3sin1tan1ten1tin3ton1trn3tun3ty4n1vo4asod5sof5ro5ino3kao1keo3kuo3lao3leo1lio1loo3luo5ly1omron3kook5o3oro5ovo3piop3lop3rop3s4or_o3rior3kor5oo3sio3soo1teo5unov4s4pec3pen1perpe5spe3u4p5h1pla5pok3potp4rop3skp5sops4pp3stpu5b5py34rafr3dr1relr1guri1er3kar1ker1kir3kurmo4r5muro1bro3pr3orr1sar1sirs4nr3spr5sur3svr1ter1tir3tort3sr5tyr3ud5rutr3var1ver3viry4ss3af1sams3aps1ar1sat4s1bsdy4s4ed4s3h1sig5sis5sit5sius5ju4sk_1skes3kl5skys1les1lislo35slus5lys4myso5k5sol3sons1pls5r4s1s44st_5stj3sto1strs1ud3suls3un3surs3ve3s4y5ta_1tag3tegteo14t1f6t3g3tid4t3k4t1l4t3pt4ra1tryt3sit3st4t1t5turt5ve1typ5udlud5rud3s3udvugs3u5guu5klu1lau1leu5lyu5peup5lu3rau3reu3rous5au3siu5sous5vu1teu1tiu1tout5r5u5vva5d1vedve3s5vet1visv3lev5livls1v5rev3stv5suy5dry3key5kiy3koy3kvy5liy5loy5muyns5y1pey3piy3rey3riy3siy3tiy5vezi5o_så3a3tøa5væe3læe3løe3røe5tæe5tøe1vægiø4g4søg5så3gø1i5tæl3væ5løsm5tån3kæn5tæo5læor3ø5præ5pædr5kær5tær5tør3vær5æl4røn5rør3rådr5års4kå3slås4næ5stø1stås5økti4øt4søt5såt3væu3læy5vææb3læg5aægs5æ5kvæ1reæ3riær5sæ5siæ3soæ3veøde5ø1jeø3keø3leøms5ø1reø3riør5oø1veå3reå5sk",
5 : "_an3k_an1s_be1t_her3_ove4_til3_yd5rab5le3abstaf4ria4gefag5inag5si3agtiais5t4alkval5siam4paar5af3a3spa3stea3stia1ta1ato5vba4tibe3robe5rube1s4be1trbi5skbo4grbo3rabo5rece5ro4d3afde5sk3drif3drivd5rosds5ands5ind1skidsu5lds5viea4laed5aredde4ed5raed3re4e1koek5sa3ekspe3ladel3akel3are1lase4lek3elem5elimel5sae4maden5ake4nanen3soer3afe4rage4rake4ref5erhve4ribero5der5over5tre3rumer5unfa4cefags3fejl1fo4rif5tvig3artgi3st4g5omgsha4g5slags3org4strheds3hi4n5ho5koho5vehund3i4bleids5ki3et_ik3reik5riiks5tik4tui3lagil3egil5ejil5elind3tings1in4svions1i5o5ri3plii3stii5suakel5ske3skke5stki3stk5lakko3ra3kortks3ank3stek5stuk4tarkti4ekt5relad3r5lagdld3st4lelele4molfin4l1go1li4galo4du4l5orlses1ls5inl4taf4m5ejm5ingmmen5mo4da4m5ovmse5sms5inm3stemu1lind5sind5sknd5spne4dan3erkn5erlne5slne5stni3stn3ordn1skuns3pon1stan5stint4suob3lio4dinod5riod5uno4geko4gelo4g5oog5reog5sk3optaor1an3ordnord5so3re_o3rego3reko3rero3retor5imor3slor3stpa5ghp5anlpe1rap4lan4ple_4pler4ples4p5p41procp5ulera5is4rarbrd4s34reksre5la5rese4ressre3st5rettri5la4rimor4ing4rinp4rintrk3sorre5sr5skrr5stur5talrt3rer5trir5trosa4ma5s4erse4se4s1g4si4bls5int1skabsk5s44snins4nit5som_3somms5oms5somt4s1op3spec4sper3s4pi1stanst5as3stat1stav1ste_1sted3stel1sten5step3stes5stetst5om1sy1s4tanvteds55tekn5termte5roti4enti3stto5rato1reto1ritor4m4trestro5vts4pats5prts5ult5udsue4t5uk4tauk4tru1reru5skaut5s43varm4v5omyk3liyk4s5yr3eky5t3r_ær5i_øv3rbrød35drøvdstå4er5øn4n5æb4s5ænså4r53værd1værkæ4gekæ4g5rælle4æn1drær4maær4moæ3steøn3støn4t3ørne3års5t",

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,20 @@
// Hyphenation patterns for Modern Monotonic Greek.
// Created by Dimitrios Filippou with some ideas borrowed from
// Yannis Haralambous, Kostis Dryllerakis and Claudio Beccari.
// From http://tug.org/svn/texhyphen/branches/ptex/hyph-utf8/tex/generic/hyph-utf8/patterns/tex/hyph-el-monoton.tex
// Converted by Pablo Rodríguez (hyphenator at pragmata dot tk)
Hyphenator.languages['el-monoton'] = Hyphenator.languages['el'] = {
leftmin : 2,
rightmin : 2,
shortestPattern : 1,
longestPattern : 7,
specialChars : "αεηιουωϊϋάέήίόύώΐΰίάύέήόώβγκδζθλμπντξρσϲςφχψ'ʼ᾿",
patterns : {
2 : "α1ε1η1ι1ο1υ1ω1ϊ1ϋ1ά1έ1ή1ί1ό1ύ1ώ1ΐ1ΰ14'4ʼ4᾿",
3 : "α2ιαα2ίά2ιά2ιά2ϊά2ϊα2υαα2ύά3υά3υε2ιε2ίε2ίέ2ιέ2ιέ2ϊέ2ϊε2υε2ύε2ύέ3υέ3υη2υη2ύη2ύή3υή3υο2ιοο2ίό2ιό2ιό2ϊό2ϊο2υοο2ύό3υό3υυ2ιυυ2ίύ3ιύ3ιααα2ϋε2ϊε2ϋοοι2αιι2άιιι2έι2οιι2όιιι2ώ_ι3_ί3_ί3η2αη2άη2άη2εη2έη2έη2οη2όη2όη2ωη2ώη2ώ_η3_ή3_ή3υ2αυυ2άυ2ουυ2όυυυ2ώ_υ3_ύ3_ύ34β_4γ_4δ_4ζ_4θ_4κ_4λ_4μ_4ν_4ξ_4π_4ρ_4σ_4ϲ_4ς_4τ_4φ_4χ_4ψ_4β'4βʼ᾿4γ'4γʼ4γ᾿4δ'4δʼ᾿4ζ'4ζʼ᾿4θ'4θʼ᾿4κ'4κʼ᾿4λ'4λʼ᾿4μ'4μʼ᾿4ν'4νʼ4ν᾿4ξ'4ξʼ᾿4π'4πʼ᾿4ρ'4ρʼ4ρ᾿4σ'4σʼ4σ᾿4ϲ'4ϲʼ4ϲ᾿4τ'4τʼ᾿4φ'4φʼ᾿4χ'4χʼ᾿4ψ'4ψʼ᾿_β4_γ4_δ4_ζ4_θ4_κ4_λ4_μ4_ν4_ξ4_π4_ρ4_σ4_ϲ4_τ4_φ4_χ4_ψ4",
4 : "ά3η_ά3η_ά3ι_ά3ι_οιό3η_ό3η_ό3ι_ό3ι_4γκ_4μπ_4ντ_4τζ_4τσ_4τϲ_4τς_4μπ'4μπʼ4μπ᾿4ντ'4ντ4ντ᾿4τζ'4τζʼ4τζ᾿σ'4τσʼσϲ'4τϲʼϲ᾿4β1β4γ1γ4δ1δ4ζ1ζ4θ1θ4κ1κ4λ1λ4μ1μ4ν1ν4π1π4ρ1ρ4σ1σ4ϲ1ϲ4τ1τ4φ1φ4χ1χ4ψ1ψ4β1ζ4β1θ4β1κ4β1μ4β1ν4β1ξ4β1π4β1σ4β1ϲ4β1τ4β1φ4β1χ4β1ψ4γ1β4γ1ζ4γ1θ4γ1μ4γ1ξ4γ1π4γ1σ4γ1ϲ4γ1τ4γ1φ4γ1χ4γ1ψ4δ1β4δ1γ4δ1ζ4δ1θ4δ1κ4δ1λ4δ1ξ4δ1π4δ1σ4δ1ϲ4δ1τ4δ1φ4δ1χ4δ1ψ4ζ1β4ζ1γ4ζ1δ4ζ1θ4ζ1κ4ζ1λ4ζ1μτζ2μ4ζ1ν4ζ1ξ4ζ1π4ζ1ρ4ζ1σ4ζ1ϲ4ζ1τ4ζ1φ4ζ1χ4ζ1ψ4θ1β4θ1γ4θ1δ4θ1ζ4θ1κ4θ1μσθ2μϲθ2μ4θ1ξ4θ1π4θ1σ4θ1ϲ4θ1τ4θ1φ4θ1χ4θ1ψ4κ1β4κ1γ4κ1δ4κ1ζ4κ1θ4κ1μ4κ1ξ4κ1π4κ1σ4κ1ϲ4κ1φ4κ1χ4κ1ψ4λ1β4λ1γ4λ1δ4λ1ζ4λ1θ4λ1κ4λ1μ4λ1ν4λ1ξ4λ1π4λ1ρ4λ1σ4λ1ϲ4λ1τ4λ1φ4λ1χ4λ1ψ4μ1β4μ1γ4μ1δ4μ1ζ4μ1θ4μ1κ4μ1λ4μ1ξ4μ1ρ4μ1σ4μ1ϲ4μ1τ4μ1φ4μ1χ4μ1ψ4ν1β4ν1γ4ν1δ4ν1ζ4ν1θ4ν1κ4ν1λ4ν1μ4ν1ξ4ν1π4ν1ρ4ν1σ4ν1ϲ4ν1φ4ν1χ4ν1ψ4ξ1β4ξ1γ4ξ1δ4ξ1ζ4ξ1θ4ξ1κ4ξ1λ4ξ1μ4ξ1ν4ξ1π4ξ1ρ4ξ1σ4ξ1ϲ4ξ1τ4ξ1φ4ξ1χ4ξ1ψ4π1β4π1γ4π1δ4π1ζ4π1θ4π1κ4π1μ4π1ξ4π1σ4π1ϲ4π1φ4π1χ4π1ψ4ρ1β4ρ1γ4ρ1δ4ρ1ζ4ρ1θ4ρ1κ4ρ1λ4ρ1μ4ρ1ν4ρ1ξ4ρ1π4ρ1σ4ρ1ϲ4ρ1τ4ρ1φ4ρ1χ4ρ1ψ4σ1δ4ϲ1δ4σ1ζ4ϲ1ζ4σ1ν4ϲ1ν4σ1ξ4ϲ1ξ4σ1ρ4ϲ1ρ4σ1ψ4ϲ1ψ4τ1β4τ1γ4τ1δ4τ1θ4τ1κ4τ1ν4τ1ξ4τ1π4τ1φστ2φϲτ2φ4τ1χ4τ1ψ4φ1β4φ1γ4φ1δ4φ1ζ4φ1κ4φ1μ4φ1ν4φ1ξ4φ1π4φ1σ4φ1ϲ4φ1χ4φ1ψ4χ1β4χ1γ4χ1δ4χ1ζ4χ1κ4χ1μ4χ1ξ4χ1π4χ1σ4χ1ϲ4χ1φ4χ1ψ4ψ1β4ψ1γ4ψ1δ4ψ1ζ4ψ1θ4ψ1κ4ψ1λ4ψ1μ4ψ1ν4ψ1ξ4ψ1π4ψ1ρ4ψ1σ4ψ1ϲ4ψ1τ4ψ1φ4ψ1χ4βρ_4γλ_4κλ_4κτ_6κς_6κϲ_4κσ_4λς_4λϲ_4λσ_4μς_4μϲ_4μσ_4νς_4νϲ_4νσ_4ρς_4ρϲ_4ρσ_4σκ_4ϲκ_4στ_4ϲτ_4τλ_4τρ_4φτ_",
5 : "ο3ϊ3όο3ϊ3ό4γ1κτ4μ1πτ4ν1τζ4νσ4νϲ4γκς_4γκϲ_4γκσ_4μπλ_4μπν_4μπρ_4ντς_4ντϲ_4ντσ_",
6 : "4ρ5γ2μ4ρ5θ2μ4λ5κ2μ4ρ5κ2μ4ν5κ2φ4γ5ξ2τ4ρ5ξ2τ4ρ5φ2ν4ρ5χ2μ4μ5ψ2τ4γ5κ2φ4γκ1ντ4γκ1τζ4γκ1τσ4γκ1τϲ4μπ1ντ4μπ1τζ4μπ1τσ4μπ1τϲ4ντ1μπ4τσ1γκ4τϲ1γκ4τσ1μπ4τϲ1μπ4τσ1ντ4τϲ1ντ",
10 : "4χτ_4γκ1μπ"
}
};

View File

@ -0,0 +1,26 @@
// Hyphenation patterns for Modern Polytonic Greek.
// Created by Dimitrios Filippou with some ideas borrowed from
// Yannis Haralambous, Kostis Dryllerakis and Claudio Beccari.
// From http://tug.org/svn/texhyphen/branches/ptex/hyph-utf8/tex/generic/hyph-utf8/patterns/tex/hyph-el-polyton.tex
// Converted by Pablo Rodríguez (hyphenator at pragmata dot tk)
Hyphenator.languages['el-polyton'] = {
leftmin : 2,
rightmin : 2,
shortestPattern : 1,
longestPattern : 11,
specialChars : "αεηιουωϊϋἀἁἂἃἄἅἆἇἐἑἒἓἔἕἠἡἢἣἤἥἦἧἰἱἲἳἴἵἶἷὀὁὂὃὄὅὐὑὒὓὔὕὖὗὠὡὢὣὤὥὦὧὰὲὴὶὸὺὼᾀᾁᾂᾃᾄᾅᾆᾇᾐᾑᾒᾓᾔᾕᾖᾗᾠᾡᾢᾣᾤᾥᾦᾧᾲᾳᾴᾶᾷῂῃῄῆῇῒῖῗῢῦῧῲῳῴῶῷάέήίόύώΐΰάέήίόύώΐΰβγκδζθλμπντξρσϲςφχψ'ʼ᾿’᾽ῤῥ",
patterns : {
2 : "α1ε1η1ι1ο1υ1ω1ϊ1ϋ1ἀ1ἁ1ἂ1ἃ1ἄ1ἅ1ἆ1ἇ1ἐ1ἑ1ἒ1ἓ1ἔ1ἕ1ἠ1ἡ1ἢ1ἣ1ἤ1ἥ1ἦ1ἧ1ἰ1ἱ1ἲ1ἳ1ἴ1ἵ1ἶ1ἷ1ὀ1ὁ1ὂ1ὃ1ὄ1ὅ1ὐ1ὑ1ὒ1ὓ1ὔ1ὕ1ὖ1ὗ1ὠ1ὡ1ὢ1ὣ1ὤ1ὥ1ὦ1ὧ1ὰ1ὲ1ὴ1ὶ1ὸ1ὺ1ὼ1ᾀ1ᾁ1ᾂ1ᾃ1ᾄ1ᾅ1ᾆ1ᾇ1ᾐ1ᾑ1ᾒ1ᾓ1ᾔ1ᾕ1ᾖ1ᾗ1ᾠ1ᾡ1ᾢ1ᾣ1ᾤ1ᾥ1ᾦ1ᾧ1ᾲ1ᾳ1ᾴ1ᾶ1ᾷ1ῂ1ῃ1ῄ1ῆ1ῇ1ῒ1ῖ1ῗ1ῢ1ῦ1ῧ1ῲ1ῳ1ῴ1ῶ1ῷ1ά1έ1ή1ί1ό1ύ1ώ1ΐ1ΰ1ά1έ1ή1ί1ό1ύ1ώ1ΐ1ΰ16'6ʼ6᾿",
3 : "α2ιαα2ία2ὶα2ῖα2ἰα2ἴα2ἲα2ἶα2ἱα2ἵα2ἳα2ἷά2ιά2ιά2ϊά2ϊα2υαα2ύα2ὺα2ῦα2ὐα2ὔα2ὒα2ὖα2ὑα2ὕα2ὓα2ὗά3υά3υε2ιε2ίε2ίε2ὶε2ῖε2ἰε2ἴε2ἲε2ἶε2ἱε2ἵε2ἳε2ἷέ2ιέ2ιέ2ϊέ2ϊε2υε2ύε2ύε2ὺε2ῦε2ὐε2ὔε2ὒε2ὖε2ὑε2ὕε2ὓε2ὗέ3υέ3υη2υη2ύη2ύη2ὺη2ῦη2ὐη2ὔη2ὒη2ὖη2ὑη2ὕη2ὓη2ὗο2ιοο2ίο2ὶο2ῖο2ἰο2ἴο2ἲο2ἶο2ἱο2ἵο2ἳο2ἷό2ιό2ιό2ϊό2ϊο2υοο2ύο2ὺο2ῦο2ὐο2ὔο2ὒο2ὖο2ὑο2ὕο2ὓο2ὗό3υό3υυ2ιυυ2ίυ2ὶυ2ῖυ2ἰυ2ἴυ2ἲυ2ἶυ2ἱυ2ἵυ2ἳυ2ἷααα2ϋά3ϋά3ϋε2ηέ2ηέ2ηε2ϊε2ϋό2ηό2ηο2ϊω2ιὠ2ιι2αιι2άι2ὰι2ᾶιιι2έι2ὲι2οιι2όι2ὸιιι2ώι2ὼι2ῶ_ί3_ί3_ῖ3_ἰ3_ἱ3η2αῃ2αη2άη2άη2ὰη2ᾶῃ2άῃ2άῃ2ὰῃ2ᾶη2εῃ2εη2έη2έη2ὲῃ2έῃ2έῃ2ὲη2οῃ2οη2όη2όη2ὸῃ2όῃ2όῃ2ὸη2ωῃ2ωη2ώη2ώη2ὼη2ῶῃ2ώῃ2ώῃ2ὼῃ2ῶ_ή3_ή3_ῆ3_ἠ3_ἡ3υ2αυυ2άυ2ὰυ2ᾶυυυ2έυ2ὲυ2ουυ2όυ2ὸυυυ2ώυ2ὼυ2ῶ_ύ3_ύ3_ῦ3_ὑ36β_6γ_6δ_6ζ_6θ_6κ_6λ_6μ_6ν_6ξ_6π_6ρ_6σ_6ϲ_6ς_6τ_6φ_6χ_6ψ_6β'6βʼ᾿6γ'6γʼ6γ᾿6δ'6δʼ᾿6ζ'6ζʼ᾿6θ'6θʼ᾿6κ'6κʼ᾿6λ'6λʼ᾿6μ'6μʼ᾿6ν'6νʼ6ν᾿6ξ'6ξʼ᾿6π'6πʼ᾿6ρ'6ρʼ6ρ᾿6σ'6σʼ6σ᾿6ϲ'6ϲʼ6ϲ᾿6τ'6τʼ᾿6φ'6φʼ᾿6χ'6χʼ᾿6ψ'6ψʼ᾿_β6_γ6_δ6_ζ6_θ6_κ6_λ6_μ6_ν6_ξ6_π6_ρ6_σ6_ϲ6_τ6_φ6_χ6_ψ6",
4 : "ά3η_ά3η_ά3ι_ά3ι_ά3ϊ_ά3ϊ_ό2ειό2ειό3η_ό3η_ό3ι_ό3ι_ό3ϊ_ό3ϊ_6γκ_6μπ_6ντ_6τζ_6τσ_6τϲ_6τς_6μπ'6μπʼ6μπ᾿6ντ'6ντ6ντ᾿6τζ'6τζʼ6τζ᾿σ'6τσʼσϲ'6τϲʼϲ᾿4β1β4γ1γ4δ1δ4ζ1ζ4θ1θ4κ1κ4λ1λ4μ1μ4ν1ν4π1π4ρ1ρ4ῤ1ῥ4σ1σ4ϲ1ϲ4τ1τ4φ1φ4χ1χ4ψ1ψ4β1ζ4β1θ4β1κ4β1μ4β1ν4β1ξ4β1π4β1σ4β1ϲ4β1τ4β1φ4β1χ4β1ψ4γ1β4γ1ζ4γ1θ4γ1κ4γ1μ4γ1ξ4γ1π4γ1σ4γ1ϲ4γ1τ4γ1φ4γ1χ4γ1ψ4δ1β4δ1γ4δ1ζ4δ1θ4δ1κ4δ1λ4δ1ξ4δ1π4δ1σ4δ1ϲ4δ1τ4δ1φ4δ1χ4δ1ψ4ζ1β4ζ1γ4ζ1δ4ζ1θ4ζ1κ4ζ1λ4ζ1μτζ2μ4ζ1ν4ζ1ξ4ζ1π4ζ1ρ4ζ1σ4ζ1ϲ4ζ1τ4ζ1φ4ζ1χ4ζ1ψ4θ1β4θ1γ4θ1δ4θ1ζ4θ1κ4θ1μσθ2μϲθ2μ4θ1ξ4θ1π4θ1σ4θ1ϲ4θ1τ4θ1φ4θ1χ4θ1ψ4κ1β4κ1γ4κ1δ4κ1ζ4κ1θ4κ1ξ4κ1π4κ1σ4κ1ϲ4κ1φ4κ1χ4κ1ψ4λ1β4λ1γ4λ1δ4λ1ζ4λ1θ4λ1κ4λ1μ4λ1ν4λ1ξ4λ1π4λ1ρ4λ1σ4λ1ϲ4λ1τ4λ1φ4λ1χ4λ1ψ4μ1β4μ1γ4μ1δ4μ1ζ4μ1θ4μ1κ4μ1λ4μ1ξ4μ1π4μ1ρ4μ1σ4μ1ϲ4μ1τ4μ1φ4μ1χ4μ1ψ4ν1β4ν1γ4ν1δ4ν1ζ4ν1θ4ν1κ4ν1λ4ν1μ4ν1ξ4ν1π4ν1ρ4ν1σ4ν1ϲ4ν1τ4ν1φ4ν1χ4ν1ψ4ξ1β4ξ1γ4ξ1δ4ξ1ζ4ξ1θ4ξ1κ4ξ1λ4ξ1μ4ξ1ν4ξ1π4ξ1ρ4ξ1σ4ξ1ϲ4ξ1τ4ξ1φ4ξ1χ4ξ1ψ4π1β4π1γ4π1δ4π1ζ4π1θ4π1κ4π1μ4π1ξ4π1σ4π1ϲ4π1φ4π1χ4π1ψ4ρ1β4ρ1γ4ρ1δ4ρ1ζ4ρ1θ4ρ1κ4ρ1λ4ρ1μ4ρ1ν4ρ1ξ4ρ1π4ρ1σ4ρ1ϲ4ρ1τ4ρ1φ4ρ1χ4ρ1ψ4σ1δ4ϲ1δ4σ1ζ4ϲ1ζ4σ1ν4ϲ1ν4σ1ξ4ϲ1ξ4σ1ρ4ϲ1ρ4σ1ψ4ϲ1ψ4τ1β4τ1γ4τ1δ4τ1θ4τ1ν4τ1ξ4τ1π4τ1φστ2φϲτ2φ4τ1χ4τ1ψ4φ1β4φ1γ4φ1δ4φ1ζ4φ1κ4φ1ν4φ1ξ4φ1π4φ1σ4φ1ϲ4φ1χ4φ1ψ4χ1β4χ1γ4χ1δ4χ1ζ4χ1κ4χ1μ4χ1ξ4χ1π4χ1σ4χ1ϲ4χ1φ4χ1ψ4ψ1β4ψ1γ4ψ1δ4ψ1ζ4ψ1θ4ψ1κ4ψ1λ4ψ1μ4ψ1ν4ψ1ξ4ψ1π4ψ1ρ4ψ1σ4ψ1ϲ4ψ1τ4ψ1φ4ψ1χβγσγϲγσμ2πϲμ2πμνσνϲν2τ6βρ_6γλ_6κλ_6κτ_6κς_6κϲ_6κσ_6λς_6λϲ_6λσ_6μς_6μϲ_6μσ_6νς_6νϲ_6νσ_6ρς_6ρϲ_6ρσ_6σκ_6ϲκ_6στ_6ϲτ_6τλ_6τρ_6φτ_6χτ_",
5 : "ο3ϊ3όο3ϊ3όο3ϊ3ὸβ5ν2τζ5ν2τλ5νρ5ν2τ",
6 : "4ρ5γ2μ4ρ5θ2μ4λ5κ2μ4ρ5κ2μ4γ5κ2φ4ν5κ2φ4γ5ξ2τ4ρ5ξ2τ4ρ5φ2ν4ρ5χ2μ4μ5ψ2τ4λ5γ2κ4ν5γ2κ4ρ5γ2κ4τ5γ2κ4ζ5μ2π4λ5μ2π4ν5μ2π4ρ5μ2πἄ5μ2ακἀ5μ2πρὄ5μ2ποὀ5μ2ποὀ5ν2τάὀ5ν2τάὀ5ν2τὰὀ5ν2τᾶ6μ2πλ_6μ2πν_6μ2πρ_",
7 : "ἰ5γου_ξε5γ2κ_ξέ5γ2κ_ξέ5γ2κ_σι5γ2κ_ϲι5γ2κἀ5μ2πάκἀ5μ2πάκἀ5μ2πανἀ5μ2πάρἀ5μ2πάρἀ5μ2πᾶρἀ5μ2παρρα5μ2πἰ5μ2πρα_κε5μ2π_λό5μ2π_λό5μ2π5μ2πέη_5μ2πέη_5μ2πεη_5μ2πογι_ξε5μ2π_ξέ5μ2π_ξέ5μ2π_ρε5μ2π_ρέ5μ2π_ρέ5μ2π_ρο5μ2πρό5μ2παρό5μ2παρό5μ2περό5μ2περό5μ2πωρό5μ2πωρο5μ2πῶρο5μ2παρο5μ2περο5μ2πωσό5μ2πασό5μ2παϲό5μ2παϲό5μ2πασό5μ2πεσό5μ2πεϲό5μ2πεϲό5μ2πεσο5μ2πῶϲο5μ2πῶσό5μ2πωσό5μ2πωϲό5μ2πωϲό5μ2πωσο5μ2παϲο5μ2πασο5μ2πεϲο5μ2πεσο5μ2πωϲο5μ2πω_τα5μ2π_χα5μ2π_χό5μ2π_χό5μ2π_ξε5ν2τ_ξέ5ν2τ_ξέ5ν2τ6γ2κ1τζ6γ2κ1τσ6γ2κ1τϲ6μ2π1τζ6μ2π1τσ6μ2π1τϲσ5γ2κ6τϲ5γ2κ6τσ5μ2π6τϲ5μ2π6τσ5ν2τ6τϲ5ν2τ",
8 : "ἐμι5γρ_μπα5γ2κ_μπι5γ2κ_σπά5γ2κ_σπά5γ2κ_ϲπά5γ2κ_ϲπά5γ2κ_σπα5γ2κ_ϲπα5γ2κ_φιό5γ2κ_φιό5γ2κ_φιο5γ2κἀ6μ3πάριἀ6μ3πάριἀ6μ3παρι_γά5μ2πι_γά5μ2πι_γα5μ2πι_ζεϊ5μ2π_κό5μ2πρ_κό5μ2προ5μ2πρι5μ2πρ5μ2πέης_5μ2πέης_5μ2πέηϲ_5μ2πέηϲ_5μ2πεης_5μ2πεηϲ_5μ2πέησ_5μ2πέησ_5μ2πεησ__μπι5μ2π_τρο6μ3π_τρό6μ3π_τρό6μ3π_ρου5μ2π_σέ5μ2πρ_σέ5μ2πρ_ϲέ5μ2πρ_ϲέ5μ2πρ_σνο5μ2π_ϲνο5μ2π_σού5μ2π_σού5μ2π_ϲού5μ2π_ϲού5μ2π_σου5μ2π_ϲου5μ2π_τζά5μ2π_τζά5μ2π_τζα5μ2π_τζι5μ2π_τό5μ2πρ_τό5μ2προ5μ2πρρα5μ2πἀ5ν2τάτζἀ5ν2τάτζ_βί5ν2τε_βί5ν2τε_βι5ν2τε_κα5νραϊ5ν2τ_μπε5ν2τ_μπι5ν2τ_ντα5ν2τ5ν2τίβαν5ν2τίβαν_ρε5ν2τί_ρε5ν2τί_ρε5νι_ροῦ5ν2τ_ρού5ν2τ_ρού5ν2τ_χα5νοα5νρ_χά5νρ_χά5νρ6γ2κ5μ2π6γ2κ5ν2τ6μ2π5ν2τ6ν2τ5μ2π",
9 : "5γραντ_ἴντρι5γ2κἰντρι5γ2κ_μα5γιό_μα5γιό_ντά5γ2κλ_ντά5γ2κλ_ντα5γ2κλἀλα5μ2πουρλού5μ2πἀρλού5μ2πἀρλοῦ5μ2πἀρλου5μ2π_βό5μ2πιρ_βό5μ2πιρο5μ2πιρα5μ2πάδ_κα5μ2πάδ_κα5μ2πίνα5μ2πίνα5μ2πῖνα5μ2πιν_κά5μ2ποτ_κά5μ2ποτ_κα5μ2πότ_κα5μ2πότ_κα5μ2ποτ_καου5μ2π_καρα5μ2π5μ2πα5μ2π5μ2πά5μ2π5μ2πά5μ2π5μ2πέ5μ2π5μ2πέ5μ2π5μ2πε5μ2π_νό5μ2πελ_νό5μ2πελ_νο5μ2πελ_ντό5μ2πρ_ντό5μ2πρ_ντο5μ2πρ_σα2μ5ποτ_ϲα2μ5ποτ_τε5μ2πεσ_τε5μ2πεϲ_τζου5μ2π_τσά5μ2πασά5μ2παϲά5μ2παϲά5μ2πασα5μ2παϲα5μ2παἀτρα5ν2τέἀτρα5ν2τέἀτρα5ν2τὲ_γιβε5ν2τ_γκάι5ν2τ_γκάι5ν2τ_γκάϊ5ν2τ_γκάϊ5ν2τ_γκαϊ5ν2τ_κα5ναΐ_κα5ναΐ_κα5ναϊ_μα5ν2τάμ_μα5ν2τάμ_μα5ν2τὰμ_μα5ναμ_μα5ν2τέμ_μα5ν2τέμ_μα5ν2τεμ_μεϊ5ν2τά_μεϊ5ν2τά_μεϊ5ναο5ν2τέλ_μο5ν2τέλ_μο5ν2τελμο5ν2τέρνμο5ν2τέρνμο5ν2τερν_νισα5ν2τ_νιϲα5ν2τ_ρεζε5ν2τ_σε5ν2τέφ_σε5ν2τέφ_ϲε5ν2τέφ_ϲε5ν2τέφ_σε5ν2τεφ_ϲε5ν2τεφ_σε5νοῦ_ϲε5νοῦ_σε5νού_σε5νού_ϲε5νού_ϲε5νού_σε5νου_ϲε5νουσα5ν2τί_τσα5ν2τί_τϲα5ν2τί_τϲα5ν2τί_τσα5νιϲα5νι",
10 : "_γιου5γοαρα5γιούλι5γ2κ_χούλι5γ2κ_χουλι5γ2κ_γιαρα5μ2π_καλα5μ2πααλί5μ2πραλί5μ2πραλι5μ2πρα5μ2παρέ_κα5μ2παρέ_κα5μ2παρὲ_κα5μ2παρε_καρνα5μ2π_κολι5μ2πρου5μ2πού_κου5μ2πού_κου5μ2ποῦ_κου5μ2που5μ2πέηδες_5μ2πέηδες_5μ2πέηδεϲ_5μ2πέηδεϲ_5μ2πέηδεσ_5μ2πέηδεσ_5μ2πέηδων_5μ2πέηδων__μπό5μ2πιρ_μπό5μ2πιρ_μπο5μ2πιρ_μπο5μ2πότ_μπο5μ2πότ_μπο5μ2ποτ_σκα5μ2παβ_ϲκα5μ2παβ_ταβλα5μ2π_τζανα5μ2π_τρα5μ2πάλ_τρα5μ2πάλ_τρα5μ2παλ_φά5μ2πρικ_φά5μ2πρικ_φα5μ2πρικ_μπαλά5ν2τ_μπαλά5ν2τ_μπαλα5ν2τ_μπα5ναν_μπου5να_μπου5νρ",
11 : "_καρα6μ3πόλ_καρα6μ3πόλ_καρα6μ3πολ_κολού5μ2προλού5μ2προλοῦ5μ2προλου5μ2προ6μ3πρέσσο6μ3πρσσο6μ3πρέϲϲο6μ3πρϲϲο6μ3πρεσσο6μ3πρεϲϲοντρα5μ2π_κωλού5μ2πρ_κωλού5μ2πρ_κωλοῦ5μ2πρ_κωλου5μ2πρανιτό5μ2π_μανιτό5μ2π_μπα6μ3πάκι_μπα6μ3πάκι_μπα6μ3πακι_ρεπού5μ2πλ_ρεπού5μ2πλ_ρεπου5μ2πλ_τα6μ3περαμ_τα6μ3πορλ_τα6μ3πούρλ_τα6μ3πορλ_τρα5μ2ποῦκ_τρα5μ2πούκ_τρα5μ2πούκ_τρα5μ2πουκ_τσι5μ2πούκ_τσι5μ2πούκ_τϲι5μ2πούκ_τϲι5μ2πούκ_τσι5μ2πουκ_τϲι5μ2πουκ_τσι5μ2πούσσι5μ2ποσϲι5μ2πούϲϲι5μ2ποϲσι5μ2πουσϲι5μ2πουϲ_γιαχου5ν2τ",
12 : "_σαλτιπά5γ2κ_σαλτιπά5γ2κ_ϲαλτιπά5γ2κ_ϲαλτιπά5γ2κ_κουλού5μ2προυλού5μ2προυλοῦ5μ2προυλου5μ2πρ_μπου5μ2πούν_μπου5μ2πον_μπου5μ2πον_μπου5μ2πουνοντρο5μ2παικβι5ναρ_ντερμπε5ν2τ_ντου5νούκ_ντου5νούκ_ντου5νοῦκ_ντου5νουκ_φαστφου5ν2τ_φαϲτφου5ν2τ",
13 : "_μπασκε2τ5μ2π_μπαϲκε2τ5μ2π_μπασι5μ2πουζ_μπαϲι5μ2πουζ"
}
};

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -1,17 +1,17 @@
Hyphenator.languages.fi = {
Hyphenator.languages['fi'] = {
leftmin : 2,
rightmin : 2,
shortestPattern : 2,
longestPattern : 7,
specialChars : 'öäå',
specialChars : "öäå",
patterns : {
3 : '1ba1be1bi1bo1bu1by1da1de1di1do1du1dy1dä1dö1fa1fe1fi1fo1fu1fy1ga1ge1gi1go1gu1gy1gä1gö1ha1he1hi1ho1hu1hy1hä1hö1ja1je1ji1jo1ju1jy1jä1jö1ka1ke1ki1ko1ku1ky1kä1kö1la1le1li1lo1lu1ly1lä1lö1ma1me1mi1mo1mu1my1mä1mö1na1ne1ni1no1nu1ny1nä1nö1pa1pe1pi1po1pu1py1pä1pö1ra1re1ri1ro1ru1ry1rä1rö1sa1se1si1so1su1sy1sä1sö1ta1te1ti1to1tu1ty1tä1tö1va1ve1vi1vo1vu1vy1vä1vöä2yo1yö2ya1äa1öo1äo1öä2äö2öä2öö2ä_ä2u2sb2lb2rd2rf2lf2rg2lg2rk2lp2lp2rc2lq2v',
4 : 'y1a2y1o2u1y2y1u2ö3a2ö3o2ä3a2ä3o2ä1u2ö1u2u1ä2u1ö2e1aai1aao1aau1aau1eea1uui1uue1uuo1uuää1iää1eää3yi1ääe1ääy1ääi1ööa1eia1oie1aii1auy1eiai1aai1eai1oai1uau1aau1eeu1aie1aie1oie1yiu1aiu1eiu1ooi1aoi1eoi1ooi1uo1uiou1eou1oue1aui1euo1auo1ue1ö2ö1e2r2asl2as1k2vsc2hts2h',
5 : '1st2raa1i2aa1e2aa1o2aa1u2ee1a2ee1i2ee1u2ee1y2ii1a2ii1e2ii1o2uu1a2uu1e2uu1o2uu1i2io1a2io1e2keus11b2lo1b2ri1b2ro1b2ru1d2ra1f2la1f2ra1f2re1g2lo1g2ra1k2ra1k2re1k2ri1k2va1p2ro1q2vich2r',
6 : '1sp2lialous1rtaus1perus12s1ase2s1apuulo2s1bib3li',
7 : 'yli1o2pali1a2v2s1ohje1a2sian1a2siat1a2sioi2s1o2sa2n1o2sa_ydi2n12n1otto2n1oton2n1anto2n1anno2n1aika2n1a2jo2s1a2jo',
8 : '2s1a2sia2n1o2pet2s1a2loialkei2s12n1e2dus2s1ajatu2s1y2rit2s1y2hti2n1a2jan2n1o2mai2n1y2lit2s1a2len2n1a2len',
9 : '2s1o2pisk2n1o2pist2s1o2pist2s1i2dea_2s1i2dean2s1e2sity_suu2r1a2',
11 : '1a2siaka2s1'
3 : "1ba1be1bi1bo1bu1by1da1de1di1do1du1dy1dä1dö1fa1fe1fi1fo1fu1fy1ga1ge1gi1go1gu1gy1gä1gö1ha1he1hi1ho1hu1hy1hä1hö1ja1je1ji1jo1ju1jy1jä1jö1ka1ke1ki1ko1ku1ky1kä1kö1la1le1li1lo1lu1ly1lä1lö1ma1me1mi1mo1mu1my1mä1mö1na1ne1ni1no1nu1ny1nä1nö1pa1pe1pi1po1pu1py1pä1pö1ra1re1ri1ro1ru1ry1rä1rö1sa1se1si1so1su1sy1sä1sö1ta1te1ti1to1tu1ty1tä1tö1va1ve1vi1vo1vu1vy1vä1vöä2yo1yö2ya1äa1öo1äo1öä2äö2öä2öö2ä_ä2u2sb2lb2rd2rf2lf2rg2lg2rk2lp2lp2rc2lq2v",
4 : "y1a2y1o2u1y2y1u2ö3a2ö3o2ä3a2ä3o2ä1u2ö1u2u1ä2u1ö2e1aai1aao1aau1aau1eea1uui1uue1uuo1uuää1iää1eää3yi1ääe1ääy1ääi1ööa1eia1oie1aii1auy1eiai1aai1eai1oai1uau1aau1eeu1aie1aie1oie1yiu1aiu1eiu1ooi1aoi1eoi1ooi1uo1uiou1eou1oue1aui1euo1auo1ue1ö2ö1e2r2asl2as1k2vsc2hts2h",
5 : "1st2raa1i2aa1e2aa1o2aa1u2ee1a2ee1i2ee1u2ee1y2ii1a2ii1e2ii1o2uu1a2uu1e2uu1o2uu1i2io1a2io1e2keus11b2lo1b2ri1b2ro1b2ru1d2ra1f2la1f2ra1f2re1g2lo1g2ra1k2ra1k2re1k2ri1k2va1p2ro1q2vich2r",
6 : "1sp2lialous1rtaus1perus12s1ase2s1apuulo2s1bib3li",
7 : "yli1o2pali1a2v2s1ohje1a2sian1a2siat1a2sioi2s1o2sa2n1o2sa_ydi2n12n1otto2n1oton2n1anto2n1anno2n1aika2n1a2jo2s1a2jo",
8 : "2s1a2sia2n1o2pet2s1a2loialkei2s12n1e2dus2s1ajatu2s1y2rit2s1y2hti2n1a2jan2n1o2mai2n1y2lit2s1a2len2n1a2len",
9 : "2s1o2pisk2n1o2pist2s1o2pist2s1i2dea_2s1i2dean2s1e2sity_suu2r1a2",
11 : "1a2siaka2s1"
}
};

View File

@ -1,27 +1,26 @@
Hyphenator.languages.fr = {
// The french hyphenation patterns are retrieved from
// http://tug_org/svn/texhyphen/trunk/collaboration/repository/hyphenator/
Hyphenator.languages['fr'] = {
leftmin : 2,
rightmin : 2,
rightmin : 3,
shortestPattern : 1,
longestPattern : 14,
specialChars : 'âêîôûçœéèàî',
//The french hyphenation patterns are retrieved from http://extensions.services.openoffice.org/project/french-dictionary-reform1990
//They are under LGPL
specialChars : "àâçèéêîïôûœ’'",
patterns : {
2 : "1j1q",
3 : "'a4'e4'i4'o4'u4'y4'â4'è4'é4'ê4'î4'ô4'û4_a4_e4_i4_o4_u4_y4_â4_è4_é4_ê4_î4_ô4_û41ba1be1bi1bo1bu1by1bâ1bè1bé1bê1bî1bô1bû1ca1ce1ci1co1cu1cy1câ1cè1cé1cê1cî1cô1cû1da1de1di1do1du1dy1dâ1dè1dé1dê1dî1dô1dû1fa1fe1fi1fo1fu1fy1fâ1fè1fé1fê1fî1fô1fû1ga1ge1gi1go1gu1gy1gâ1gè1gé1gê1gî1gô1gû1ha1he1hi1ho1hu1hy1hâ1hè1hé1hê1hî1hô1hû1ka1ke1ki1ko1ku1ky1kâ1kè1ké1kê1kî1kô1kû1la1le1li1lo1lu1ly1là1lâ1lè1lé1lê1lî1lô1lû1ma1me1mi1mo1mu1my1mâ1mè1mé1mê1mî1mô1mû1na1ne1ni1no1nu1ny1nâ1nè1né1nê1nî1nô1nû1pa1pe1pi1po1pu1py1pâ1pè1pé1pê1pî1pô1pû1ra1re1ri1ro1ru1ry1râ1rè1ré1rê1rî1rô1rû1sa1se1si1so1su1sy1sâ1sè1sé1sê1sî1sô1sû1ta1te1ti1to1tu1ty1tà1tâ1tè1té1tê1tî1tô1tû1va1ve1vi1vo1vu1vy1vâ1vè1vé1vê1vî1vô1vû1wa1we1wi1wo1wu1za1ze1zi1zo1zu1zy1zè1zé2'22jkn1xé1q",
4 : "_1ba_1bi_1ci_1co_1cu_1da_1di_1do_1dy_1dé_1ge_1la_1ma_1mi_1mo_1mé_1no_1pa_1pe_1po_1pu_1pé_1re_1ré_1sa_1se_1so_1su_1sy_1ta1b2l1b2r1c2h1c2k1c2l1c2r1c½01d2r1f2l1f2r1g2l1g2n1g2r1k2h1k2r1m½01n½01p2h1p2l1p2r1r2h1s2h1s½01t2h1t2r1v2r1w2r2chb2chg2chm2chn2chp2chs2cht2chw2ckb2ckf2ckg2ckp2cks2ckt2phn2phs2pht2shm2shr2shs2thl2thm2thn2ths4be_4ce_4ch_4ck_4de_4fe_4ge_4he_4je_4ke_4kh_4le_4me_4ne_4pe_4ph_4re_4se_4sh_4te_4th_4ve_4we_4ze_a1bîa1laa1maa1nea1nia1poa1viab2hac1qad2har1cb1leb1reb1ruc1cic1kec1lac1lec1rec2hac2hec2hic2hoc2huc2hyc2hâc2hèc2héc2hêc2hîc2hôc2hûch2lch2rd1had1hod1led1red1s2e1nif1laf1lef1ref1rif1s2g1leg1neg1rag1reg1s2i1vail2ll1lil1lul1mem1nèm1ném1s2n1sao1pup1hep1hop1lep1lup1nep1rep1rip1rop1rup1rép1syp1tèp1téph2lph2rr1cir1her1hyr1mis1cas1cos1hes1hos1las1los1pas1pes1pis1pos1tas1tes1tis1tos1tys1tét1het1rat1ret1rit1ruth2ru1ciu1niu1viv1reâ1meè1meé1ceé1cié1cué1deé1leé1lié1loé1léé1mié1neé1nié1pié1reô1me",
5 : "'a1mi'a1na'a1po'o1vi_1p2l_1p2r_1t2r_a1mi_a1na_a1po_c2hè_con4_cul4_dé2s_o1vi_p1ha_p1lu_p1ro_p1ré_p1sy_pe4r_réu2_s1ta_s1ti_t1ri_é1mi1d2'21g2ha1g2he1g2hi1g2ho1g2hy1p2né4bes_4ces_4des_4fes_4ges_4gue_4hes_4jes_4kes_4les_4mes_4nes_4pes_4que_4res_4ses_4tes_4ves_4wes_4zes_ab1seac1ceai1meal1coan1tiap1paar1mear1mias1meau1meca1pica1rêch1lech1loch1rech1rocil3lco1apco1arco1auco1axco1efco1enco1exco1nuco1é2cy1rid1d2hda1medi1lidé1caer1mees1cees1coes1tifa1mefu1mefé1cugil3lhil3lhu1mehy1pehy1pohé1mihé1moi1b2ri1oxyib1riim1maim1miim1poim1puin1ciin1diin1doin1duin1foin1noin1soin1tein1tiis1ceis1taja1cel1s2tlil3lmi1memil3lmo1nomu1nimé1coo1b2lo1d2lo1g2nob1looc1teog1noom1buom1meom1nior1meos1taos1tios1toos1tépa1lépa1piph1leph1reph1taph1tipi1ripo1lypu1pipu1sipé1nupé1réra1dira1mere1lere1lire1peri1meru1leré1geré1maré1suré1tis1c2ls1p2hs1t2rsc1lésc2hese1mises1qsp1hèsp1hést1rost1rusu1mesu1pesu1rasu1rét1c2ht1t2lta1metc2hith1reth1rito1metu1meté1léue1viva1civa1nive1nivi1divil3lvé1loxil3lys1toé1d2réd1riéd2hiélo1q",
6 : "'a1b2r'a1g2n'ab1ré'ag1na'an1ti'ar1ge'ar1pe'as2ta'i1g2n'in1te'in2er'on1gu_1c2h4_1k2h4_1p2h4_1s2h4_1t2h4_a1b2r_a1g2n_ab1ré_ag1na_an1ti_ar1de_ar1ge_ar1pe_as2ta_bi1au_bi1u2_ci1sa_co1o2_cons4_do1le_dy2s3_dé1a2_dé1io_dé1o2_dé1sa_dé1se_dé1so_dé1su_i1g2n_in1te_in2er_la1te_ma1la_ma1le_ma1li_ma1lo_mil3l_mo1no_mé1go_mé1se_mé1su_mé1ta_mé2sa_no1no_on1gu_pa1na_pa1ni_pa1no_pa1ra_pa1re_pa1te_pé1ri_re1s2_res1q_ré1a2_ré1e2_ré1i2_ré1o2_ré1é2_ré2el_ré2er_ré2èr_su1bi_su1bu_su1ri_su1ro_ta1le1m2nès1octet1p2neu1p2tèr1p2tér1s2c2h1s2cop1s2lav1s2lov1s2por2bent_2c1k3h2cent_2dent_2fent_2gent_2jent_2kent_2lent_2nent_2pent_2rent_2s2chs2s3hom2sent_2tent_2vent_2went_2xent_2zent_3d2hal4b4le_4b4re_4c4he_4c4ke_4c4le_4c4re_4d4re_4f4le_4f4re_4g4le_4g4ne_4g4re_4gues_4p4he_4p4le_4p4re_4ques_4r4he_4s4ch_4s4he_4t4he_4t4re_4v4re_abs1tiabî1meani1mear1c2harc2hias1t2ravil4laè1d2rbou1mebou1tibru1mecci1deche1vicla1meco1a2dco1accco1g2ncog1nicom1pécon1ficon1nicon1ticor1pucur1redis1codis1sidis1tidé1t2rdét1rien1t2rent1reeus1taex1t2rfi1c2hfic2hufir1mefri1tihémi1éins1tiisc2hiiva1leiè1d2rl3lionla1w2rllu1memil1lemit1tenu1t2rnut1riopu1leos1t2rost1raost1rioxy1a2oè1d2rpe1r3hper1maper1tipho1toplu1mepri1vapru1depré1sepu1g2npug1napé2nulqua1merai1mercil4lrin1germil4lry1t2hry2thmser1geser1pesla1lospa1tispi1rospo1rusto1mosté1résu1b2lsub1lisub1s2suc1cuta1c2htac2hytan1getem1péter1getes1tator1retri1detru1cutur1buucil4luvil4lvol1tawa2g3néci1meécu1meédri1qéli1meélé1meémil4léni1te",
7 : "'ab3réa'ami1no'e1n1a2'e1n1o2'i1n1a2'i1n1e2'i1n1i2'i1n1o2'i1n1u2'i1n1é2'i2g3ni'i2g3né'i2g4no'in2ept'in2i3q'in2i3t'in2ond'in2u3l'in2uit'ina1ni'ini1mi'ino1cu'ins1ta'iné1lu'iné1na'oua1ou_ab3réa_ami1no_bai1se_bi1a2c_bi1a2t_bio1a2_com1me_coo1li_da1c2r_dac1ry_di1ald_di1e2n_di2s3h_dia1ci_dia1to_dé2s1½_dé3s2c_dé3s2p_dé3s2t_e1n1a2_e1n1o2_gem1me_i1n1a2_i1n1e2_i1n1i2_i1n1o2_i1n1u2_i1n1é2_i2g3ni_i2g3né_i2g4no_in2ept_in2i3q_in2i3t_in2ond_in2u3l_in2uit_ina1ni_ini1mi_ino1cu_ins1ta_iné1lu_iné1na_ma1c2r_ma1g2n_ma2c3k_ma2r1x_mac1ro_mag1ni_mag1nu_mil1li_mé2s1i_mé3san_oua1ou_pa1r2h_pen2ta_pha1la_plu1ri_pon1te_pos1ti_pro1é2_pré1a2_pré1e2_pré1i2_pré1o2_pré1s2_pré1u2_pré1é2_pré2au_re2s3s_re2s3t_res1ca_res1ci_res1co_res1pe_res1pi_res1po_res1se_res1ta_res1ti_res1to_res1té_ré1t2r_ré2aux_ré2uss_réa1li_rét1ro_sar1me_ser1me_seu2le_sou1ve_stil3l_su1b2l_su2r3h_sub1li_émi1ne1alcool1s2clér1s2perm1s2phèr1s2phér1s2piel1s2tein1s2tigm1é2drie1é2nerg2chent_2guent_2phent_2quent_2r3heur2shent_2t3heur3d2houd3ph2tis4b4les_4b4res_4c4hes_4c4kes_4c4les_4c4res_4ch4le_4ch4re_4d4res_4f4les_4f4res_4g4les_4g4nes_4g4res_4p4hes_4p4les_4p4res_4ph4le_4ph4re_4r4hes_4s4hes_4t4hes_4t4res_4th4re_4v4res_amal1gaanti1feappa1rearmil5lcapil3lcarê1mechlo1rachlo1réchro1meco1ac1qco2nurbcoas1socoas1sucyril3ldia1p2hdiaph2rdili1gedéca1dee2s3c2he2s3copesti1meext1ra1extra2cextra2iflam1mefécu1legram1megran1dihype4r1hypers2hypo1a2hypo1e2hypo1i2hypo1o2hypo1s2hypo1u2hypo1é2i1al1gii1s2c2hi1s2tatiar1t2hibril3limma1neimmi1neimpo1teimpu1deinci1deindi1geindo1leinno1ceinso1leinti1meio1a2ctl2ment_la2w3remil4letmon1t2rmono1vamont1rémoye1nâmuni1fin3s2at_o1io1nio1s2taso1s2tato1s2timo1s2tomogno1moomni1poomni1s2papil2lpiril3lpoly1a2poly1e2poly1i2poly1o2poly1s2poly1u2poly1vapoly1è2poly1é2pros1taproé1mipréé1mipupil3lpusil3lreli1meryth1meréma1neréti1cesemil4lstan1dasu3r2ahsupe4r1supers2suré1mither1mothril3ltung2s3télé1e2télé1i2télé1s2u2s3t2ruevil4luni1a2xuni1o2vvacil4lvanil2lven1t2rveni1mevent1rividi1mey1al1giy1s2tomâ2ment_è2ment_é3cent_é3dent_é3rent_épis1coéqui1poéqui1vaô2ment_",
8 : "'a2g3nat'anti1a2'anti1e2'anti1s2'anti1é2'eu2r1a2'inau1gu'inef1fa'inte4r3'inters2'ovi1s2c_1s2c2h4_a2g3nat_anti1a2_anti1e2_anti1s2_anti1é2_bi2s1a2_chè1v2r_chèv1re_con1t2r_cont1re_di1a2cé_di1a2mi_dy2s1a2_dy2s1i2_dy2s1o2_dy2s1u2_dé2s1i2_dé2s1é2_dés2a3m_désa1te_dési1ne_déso1pi_eu2r1a2_inau1gu_inef1fa_inte4r3_inters2_ma2l1ap_ma2l1en_ma2l1oc_mono1a2_mono1e2_mono1i2_mono1o2_mono1s2_mono1u2_mono1é2_mé2g1oh_mé2s1es_ovi1s2c_pa2n1is_pa2r3hé_para1s2_pe1r1a2_pe1r1e2_pe1r1i2_pe1r1o2_pe1r1u2_pe1r1é2_pluri1a_pon2tet_pos2t3h_pos2t3r_post1s2_pro1g2n_prog1na_psyc2ho_pud1d2l_péri1os_péri1s2_péri1u2_re3s4tu_re3s4ty_res1c2r_res1p2l_resp1le_rest1re_rest1ri_ré2a3le_ré2i3fi_sta2g3n_su2b1a2_su2b1in_su2b1ur_su2b1é2_su2b3lu_su2r1a2_su2r1e2_su2r1of_su2r1ox_su2r1é2_su3r2et_syn1g2n_syng1na_tri1a2c_tri1a2n_tri1a2t_tri1o2n1m2né1mo1m2né1si1s2patia1s2piros1s2tomos1s2ty1le2b2lent_2b2rent_2c2kent_2c2lent_2c2rent_2d2lent_2d2rent_2f2lent_2f2rent_2g2lent_2g2nent_2g2rent_2p2lent_2p2rent_2t2rent_2v2rent_4ch4les_4ch4res_4ph4les_4ph4res_4s4c4he_4th4res_a1è2d1rea2s3t1roab3sent_absti1neac3cent_ai2ment_anes1t2hanest1héar2ment_as2ment_au2ment_boutil3lca3ou3t2chevil4lchien1deco1assocco1assurcompé1teconfi1deconni1veconti1necorpu1leda2ment_di2s3copdiaph1radissi1dedistil3ldétri1meentre1geer2ment_es3cent_eu1s2tatfa2ment_fichu1mefritil3lfu2ment_hu2ment_hype1ra2hype1re2hype1ri2hype1ro2hype1ru2hype1ré2hémo1p2ti1arth2ri1è2d1rei2s3c2héi2s3chiai2s3chioimmis1ceindul1geinfor1mainstil3lintel1liis3cent_ja3cent_mi2ment_mécon1ten3s2ats_nutri1meo1s2trado1è2d1reo2b3longom2ment_ombud2s3or2ment_paléo1é2papil1lopapil3lapapil3lepapil3liperma1neperti1nephoto1s2poas1t2rpu2g3nacpé1r2é2qra2ment_radio1a2re3lent_re3pent_ri2ment_ru3lent_ré3gent_résur1geslalo1mesporu1lesu2ment_subli1mesuccu1lesupe1ro2ta2ment_tachy1a2tchin3t2tempé1ratesta1meto2ment_tran2s3htran2s3ptrans1patrucu1letu2ment_turbu1letélé1o2btélé1o2pvanil1liy1as1t2hé3quent_",
9 : "'ae3s4c2h'apo2s3ta'ar3gent_'ar3pent_'in1s2tab'in2a3nit'in2augur'in2effab'in2o3cul'inte1ra2'inte1re2'inte1ri2'inte1ro2'inte1ru2'inte1ré2_ae3s4c2h_apo2s3ta_ar3dent__ar3gent__ar3pent__baise1ma_ci2s1alp_co2o3lie_di1a2cid_di1a2tom_do3lent__dé2s1u2n_dé3s2ert_dé3s2exu_dé3s2i3d_dé3s2i3r_dé3s2ist_dé3s2o3l_dé3s2orm_dé3s2orp_désen1si_in1s2tab_in2a3nit_in2augur_in2effab_in2o3cul_inte1ra2_inte1re2_inte1ri2_inte1ro2_inte1ru2_inte1ré2_la3tent__ma2g3num_ma2l1a2v_ma2l1int_ma2l1o2d_magni1ci_magni1fi_mala1d2r_malad1re_milli1am_mé2s1u2s_no2n1obs_pa2n1a2f_pa2n1opt_pa3rent__pa3tent__para1c2h_pos2t1in_pos2t1o2_pro1s2cé_prou3d2h_pré2a3la_péri2s3s_re2s3cap_re2s3cou_re2s3pir_re3s4t2r_re3s4tab_re3s4tag_re3s4tat_re3s4tim_re3s4tip_re3s4toc_re3s4top_re3s4tén_re3s4tér_ré2a3lis_ré2a3lit_rétro1a2_su2b3lin_su2r1i2m_su2r1inf_su2r1int_su3b2alt_su3b2é3r_su3r2a3t_su3r2eau_su3r2ell_subli1mi_ta3lent_1informat1p2sy1c2h1s2ca1p2h1s2to1c2k1é2drique1é2lément2r3hy1d2r3ph2ta1lé4s4c4hes_a2l1al1giabî2ment_amalga1meani2ment_apo2s3t2rarchié1pibou2ment_bru2ment_cci3dent_cla2ment_contin1gecur3rent_e2n1i2v2rfir2ment_grandi1loiva3lent_llu2ment_mit3tent_monova1lemunifi1ceo1s2té1roo2g3no1siomnipo1teopu3lent_ostric1tipapil3lomplu2ment_po1ast1repolyva1leprivat1dopro2s3tatproémi1nepru3dent_pré3sent_préémi1nepugna1b2lqua2ment_rai3ment_rin3gent_ser3gent_ser3pent_sesqui1a2stéréo1s2surémi1netan3gent_ter3gent_thermo1s2tor3rent_tran2s1a2tran2s1o2tran2s1u2tri3dent_vanil3linvanil3lisventri1povol2t1ampvélo1s2kiéci2ment_écu2ment_éli2ment_éni3tent_épi2s3copéquipo1teéquiva1le",
10 : "'amino1a2c'ana3s4t2r'in2exo1ra'on3guent__1p2sy1c2h_amino1a2c_ana3s4t2r_chèvre1fe_com3ment__contre1ma_dacryo1a2_dé3s2i3li_gem2ment__in2exo1ra_macro1s2c_mono1ï2dé_on3guent__pa2n1a2mé_pa2n1a2ra_péri2s3ta_re2s3c1ri_re2s3pect_re2s3pons_re2s3quil_re3s4tand_re4s5trin_res3sent__sar3ment__ser3ment__sou3vent__émi3nent_1s2tandard1s2tro1p2h1s2truc1tu1é2lec1t2racquies1ceantifer1meappa3rent_carê2ment_chlo2r3a2cchlo2r3é2tchro2ment_co2g3ni1tidili3gent_déca3dent_esti2ment_flam2ment_fécu3lent_gram2ment_grandilo1qimma3nent_immi3nent_impo3tent_impu3dent_inci3dent_indi3gent_indo3lent_inno3cent_inso3lent_intelli1geinti2ment_mon2t3réalmoye2n1â2go1s2tra1tureli2ment_ryth2ment_réma3nent_réti3cent_tempéra1metran3s2acttran3s2atstranspa1reveni2ment_vidi2ment_élo3quent_",
11 : "'anti2en1ne'in2i3mi1ti_1dé3s2o3dé_anti2en1ne_contre1s2c_dé3s2a3c2r_dé3s2ensib_dé3s2i3g2n_dé3s2i3nen_dé3s2in1vo_dé3s2o3pil_dé3s2é3g2r_in2i3mi1ti_ma2l1ai1sé_magnifi1ca_mé1ta1s2ta_pa2n1o2p2h_phalan3s2t_psycho1a2n_re2s3ci1si_re2s3ci1so_re2s3plend_re4s5trein_re4s5trict_su2b3liminabsti3nent_archi1é2pischien3dent_compé3tent_confi3dent_conni3vent_conti3nent_corpu3lent_diaphrag1medissi3dent_détri3ment_entre3gent_fichu3ment_immis4cent_indul3gent_mécon3tent_nutri3ment_o2g3nomo1niperma3nent_perti3nent_privatdo1ceprivatdo1zepu2g3nab1lerésur3gent_slalo2ment_sporu4lent_subli2ment_succu3lent_testa3ment_trucu3lent_turbu3lent_ventripo1teépi3s4co1pe",
12 : "'in2é3luc1ta'in2é3nar1ra_bai2se3main_dé3s2a3tell_dé3s2as1t2r_dé3s2ou1f2r_in2é3luc1ta_in2é3nar1ra_ma2l1a2d1ro_ma2l1a2dres_pa2r1a2c2he_pa2r1a2c2hè1a2nesthé1siamalga2ment_contin3gent_monova3lent_munifi3cent_o1s2trictionomnipo3tent_polyva3lent_proémi3nent_préémi3nent_surémi3nent_équipo3tent_équiva4lent_",
13 : "_ma2g3nici1de_ma2g3nificat_pro2g3na1t2h_syn2g3na1t2hacquies4cent_antifer3ment_intelli3gent_tempéra3ment_transpa3rent_",
14 : "_chèvrefeuil2l_contremaî1t2rdiaphrag2ment_privatdo3cent_privatdo3zent_ventripo3tent_",
2 : "1ç1j1q",
3 : "1gèâ41zu1zo1zi1zè1zé1ze1zay4_y41wu1wo1wi1we1wa1vy1vû1vu1vô1vo1vî1vi1vê1vè1vé1ve1vâ1vaû4_û4u4_u41ba1bâ1ty1be1bé1bè1bê1tû1tu1tô1bi1bî1to1tî1ti1tê1tè1té1te1tà1tâ1ta1bo1bô1sy1sû1su1sœ1bu1bû1by221ca1câ1sô1ce1cé1cè1cê1so1sî1si1sê1sè1sé1se1sâ1sa1ry1rû1ru1rô1ro1rî1ri1rê1rè1ré1re1râ1raa41py1pû1pu1pô1po1pî1pi1pê1pè1pé1pe1pâ1pa_ô41ci1cîô4o4_o41nyn1x1nû1nu1nœ1nô1no1nî1ni1nê1nè1né1ne1nâ1co1cô1na1my1mû1mu1mœ1mô1mo1mî1mi1cœ1mê1mè1mé1me1mâ1ma1ly1lû1lu1lô1lo1lî1li1lê1lè1cu1cû1cy1lé1d1da1dâ1le1là1de1dé1dè1dê1lâ1la1ky1kû1ku1kô1ko1kî1ki1kê1kè1ké1ke1kâ1ka2jk_a4î4_î4i4_i41hy1hû1hu1hô1ho1hî1hi1hê1hè1hé1he1hâ1ha1gy1gû1gu1gô1go1gî1gi1gê_â41gé1ge1gâ1ga1fy1di1dî1fû1fu1fô1foe41fî1fi1fê1fè1do1dô1fé1fe1fâ1faè41du1dû1dy_è4é4_é4ê4_ê4_e41zy",
4 : "1f2lab2h2ckg2ckp2cksd1s22ckb4ck_1c2k2chw4ze_4ne_2ckt1c2lad2hm1s22cht2chsch2r2chp4pe_1t2r1p2h_ph44ph_ph2l2phnph2r2phs1d2r2pht2chn4fe_2chm1p2l1p2r4me_1w2rch2l2chg1c2r2chb4ch_1f2r4le_4re_4de_f1s21k2r4we_1r2h_kh44kh_1k2h4ke_1c2h_ch44ge_4je_4se_1v2r_sh41s2h4ve_4sh_2shm2shr2shs4ce_il2l1b2r4be_1b2l4he_4te__th41t2h4th_g1s21g2r2thl1g2l2thm2thnth2r1g2n2ths2ckf",
5 : "2ck3h4rhe_4kes_4wes_4res_4cke_éd2hi4vre_4jes_4tre_4zes_4ges_4des_i1oxy4gle_d1d2h_cul44gne_4fre_o1d2l_sch44nes_4les_4gre_1s2ch_réu24sch_4the_1g2hy4gue_2schs4cle_1g2ho1g2hi1g2he4ses_4tes_1g2ha4ves_4she_4che_4cre_4ces_t1t2l4hes_l1s2t4bes_4ble__con4xil3lco1ap4que_vil3l4fle_co1arco1exco1enco1auco1axco1ef4pes_co1é2per3h4mes__pe4r4bre_4pre_4phe_1p2né4ple__dé2smil3llil3lhil3l4dre_cil3lgil3l4fes_",
6 : "in1o2rcil4l4phre_4dres_l3lioni1algi2fent_émil4l4phle_rmil4l4ples_4phes_1p2neuextra14pres_y1asthpé2nul2xent__mé2sa2pent_y1algi4chre_1m2nès4bres_1p2tèr1p2tér4chle_en1o24fles_oxy1a2avil4l_en1o24ques_uvil4lco1a2d4bles__in1a2in1a21s2por_cons4_bi1u2as2ta_in1e2in1e2_in1é2in1é21s2lov1s2lavco1acq2cent__as2ta_co1o24ches_hémi1é_in2erin2er2s3homo1ioni_in1i2in1i22went_4shes__ré1a2_ré1é2_ré1e2_ré2el_in1o2ucil4lco1accu2s3tr_ré2er_ré2èr4cles_2vent__ré1i22sent_2tent_2gent__ré1o24gues__re1s24sche_4thes_en1a2e2s3ch4gres_1s2cop2lent__en1a22nent__in1u2in1u24gnes_4cres_wa2g3n4fres_4tres_4gles_1octet_dé1o2_dé1io4thre__bi1au2jent__dé1a22zent_4vres_2dent_4ckes_4rhes__dy2s3sub1s22kent_2rent_2bent_3d2hal",
7 : "a2g3nos3d2houdé3rent__dé3s2t_dé3s2pé3dent_2r3heur2r3hydri1s2tat2frent_io1a2ctla2w3rein2u3l_in2u3l2crent_in2uit_in2uit1s2caph1s2clér_ré2ussi2s3ché_re2s3t_re2s3s4sches_é3cent__seu2lein2ond_in2ondin2i3t_in2i3tin2i3q_ré2aux_in2i3q2shent__di1alduni1a2xin2ept2flent__in2eptuni1o2v2brent_co2nurb2chent_2quent_1s2perm1s2phèr_ma2c3kuevil4l1s2phér1s2piel1s2tein1s2tigm4chles_1s2tock1s2tyle1p2sych_pro1é2_ma2r1x_stil3lpusil3libril3lcyril3l_pré1s2thril3l_mé3san_pré1u2_mé2s1i_pré1o2_pré1i2piril3lpupil3lâ2ment__pré1e2_pré1é2_pré2au_pré1a22prent_2vrent_supero2_di1e2npoly1u2è2ment_poly1s2poly1o2poly1i2poly1è2poly1é2poly1e2poly1a2supe4r1capil3l2plent_armil5lsemil4lmil4letvacil4l_di2s3h3ph2tis2dlent_a2s3tro4phres_l2ment_i1è2drei1arthr2drent_4phles_supers2ô2ment_extra2i2phent_su3r2ah_su2r3hextra2chypo1u21alcool_per1u2_per1o2_per1i2_per1é2hypo1s2_per1a2hypo1o2hypo1i2hypo1é2_pen2tahypo1e2hypo1a2y1s2tome2s3cophyperu2hype4r1hypers2hypero21m2némohyperi21m2nési4chres_a1è2drehyperé2hypere2hypera2oua1ou_oua1ouo1s2tomo1s2timo1s2tato1s2tasomni1s2tung2s3_dé3s2c2blent__bio1a2télé1e2télé1i22clent_télé1s22guent_1é2nerg2grent_2trent__dé2s1œ2t3heuro1è2dre2gnent_2glent_4thres__bi1a2t1é2drie_bi1a2c_i2g3nin3s2at_i2g3ni2ckent__i2g3néab3réai2g3né_ab3réa_per1e2",
8 : "_ma2l1ap_dy2s1u2_dy2s1o2_dy2s1i2n3s2ats__dy2s1a2distil3l1é2lectrinstil3l1s2trophe2n1i2vro2b3long1s2tomos_ae3s4chae3s4ch_eu2r1a2ombud2s3eu2r1a2_mono1s2_mono1u2o1s2téro_mono1o2eu1s2tato1s2tradfritil3la2l1algi_mono1i2_mono1é2_ovi1s2covi1s2c_mono1e2_mono1a2co1assocpaléo1é2boutil3l1s2piros_ré2i3fi_pa2n1ischevil4l1s2patiaca3ou3t2_di1a2cé_para1s2_pa2r3héco1assur_su2b1é2tu2ment_su2ment__su2b1in_su2b3lupapil3lire3pent_inte4r3_su2b1urab3sent__su2b1a2di2s3cophu2ment_fu2ment__intera2au2ment_as2ment_or2ment_intera2_intere2pé1r2é2q_péri1os_péri1s2ja3cent__anti1a2_péri1u2anti1a2er2ment__anti1e2ac3cent_ar2ment_to2ment_intere2ré3gent_papil3leom2ment_anti1e2photo1s2_anti1é2_interé2anti1é2_anti1s2anti1s23ph2taléinteré2ri2ment__interi2interi2mi2ment_apo2s3tri2s3chio_pluri1ai2s3chia_intero2intero2_inte4r3po1astre_interu2interu2_inters2ai2ment_inters2papil3la_tri1o2n_su2r1a2_pon2tet_pos2t3h_dés2a3mes3cent__pos2t3r_post1s2_tri1a2tta2ment__tri1a2nra2ment_is3cent__su2r1e2_tri1a2cfa2ment_da2ment__su3r2et_su2r1é2_mé2s1es_mé2g1oh_su2r1of_su2r1ox_re3s4ty_re3s4tu_ma2l1oca2g3nat_dé2s1é2_ma2l1entachy1a2_pud1d2ltchin3t2_re3s4trtran2s3p_bi2s1a2tran2s3hhémo1p2té3quent__a2g3nat_dé2s1i2télé1o2bo2g3nosiradio1a2télé1o2ppu2g3nacru3lent__sta2g3nre3lent__ré2a3le_di1a2mi",
9 : "_ré2a3lit_dé3s2o3lthermo1s2_dé3s2ist_dé3s2i3rmit3tent_éni3tent__do3lent__ré2a3lisopu3lent__pa3tent__re2s3cap_la3tent__co2o3lie_re2s3cou_re2s3cri_ma2g3num_re2s3pir_dé3s2i3dco2g3nititran2s1a2tran2s1o2_dé3s2exu_re3s4tab_re3s4tag_dé3s2ert_re3s4tat_re3s4tén_re3s4tér_re3s4tim_re3s4tip_re3s4toc_re3s4toptran2s1u2_no2n1obs_ma2l1a2v_ma2l1int_prou3d2hpro2s3tativa3lent__ta3lent__rétro1a2_pro1s2cé_ma2l1o2dcci3dent__pa3rent__su2r1int_su2r1inf_su2r1i2mtor3rent_cur3rent__mé2s1u2stri3dent__dé3s2orm_su3r2ell_ar3dent__su3r2eaupru3dent__pré2a3lacla2ment__su3r2a3t_pos2t1o2_pos2t1inqua2ment_ter3gent_ser3gent_rai3ment_abî2ment_éci2ment_ar3gent__ar3gent_rin3gent_tan3gent_éli2ment_ani2ment_apo2s3ta_apo2s3tavélo1s2kivol2t1amp_dé3s2orp_dé2s1u2n_péri2s3ssesqui1a2ana3s4trfir2ment_écu2ment_ser3pent_pré3sent_ar3pent__ar3pent_in1s2tab_in1s2tabin2o3cul_in2o3culplu2ment_bou2ment_in2exora_in2exora_su2b3linbru2ment__su3b2é3r_milli1amin2effab_in2effabin2augur_di1a2cid_in2augur_pa2n1optin2a3nit_in2a3nit1informat_ana3s4trvanil3lis_di1a2tom_su3b2altvanil3linstéréo1s2_pa2n1a2fo1s2tratuépi2s3cop_ci2s1alp1s2tructu1é2lément1é2driquepapil3lomllu2ment_",
10 : "1s2tandardimmi3nent__émi3nent_imma3nent_réma3nent_épi3s4cope_in2i3mitiin2i3miti_res3sent_moye2n1â2gréti3cent__dé3s2a3crmon2t3réalinno3cent__mono1ï2dé_pa2n1a2méimpu3dent__pa2n1a2ra_amino1a2camino1a2c_pa2n1o2phinci3dent__ser3ment_appa3rent_déca3dent__dacryo1a2_dé3s2astr_re4s5trin_dé3s2é3gr_péri2s3ta_sar3ment__dé3s2oufr_re3s4tandchro2ment__com3ment__re2s3quil_re2s3pons_gem2ment__re2s3pect_re2s3ciso_dé3s2i3gn_dé3s2i3ligram2ment__dé3s2invo_re2s3cisitran3s2actanti2enneindo3lent__sou3vent_indi3gent_dili3gent_flam2ment_impo3tent_inso3lent_esti2ment_on3guent__on3guent_inti2ment__dé3s2o3défécu3lent_veni2ment_reli2ment_vidi2ment_chlo2r3é2tpu2g3nablechlo2r3a2cryth2ment_o2g3nomonicarê2ment__méta1s2ta_ma2l1aisé_macro1s2célo3quent_tran3s2ats_anti2enne",
11 : "_contre1s2cperti3nent_conti3nent__ma2l1a2dro_in2é3lucta_psycho1a2n_dé3s2o3pilin2é3luctaperma3nent__in2é3narratesta3ment__su2b3liminrésur3gent_in2é3narraimmis4cent__pro2g3nathchien3dent_sporu4lent_dissi3dent_corpu3lent_archi1é2pissubli2ment_indul3gent_confi3dent__syn2g3nathtrucu3lent_détri3ment_nutri3ment_succu3lent_turbu3lent__pa2r1a2che_pa2r1a2chèfichu3ment_entre3gent_conni3vent_mécon3tent_compé3tent__re4s5trict_dé3s2i3nen_re2s3plend1a2nesthésislalo2ment__dé3s2ensib_re4s5trein_phalan3s2tabsti3nent_",
12 : "polyva3lent_équiva4lent_monova3lent_amalga2ment_omnipo3tent__ma2l1a2dreséquipo3tent__dé3s2a3tellproémi3nent_contin3gent_munifi3cent__ma2g3nicideo1s2trictionsurémi3nent_préémi3nent__bai2se3main",
13 : "acquies4cent_intelli3gent_tempéra3ment_transpa3rent__ma2g3nificatantifer3ment_",
14 : "privatdo3cent_diaphrag2ment_privatdo3zent_ventripo3tent__contre3maître",
15 : "grandilo3quent_",
16 : "_cont1re3maît1re",
17 : "_chè2vre3feuil1le"
16 : "_chè2vre3feuille"
}
};

File diff suppressed because one or more lines are too long

Some files were not shown because too many files have changed in this diff Show More