From 069e77b9b9f0cc44a81d7b209a53a0d6a9c7b116 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Thu, 19 Nov 2015 08:13:47 +0530
Subject: [PATCH] Remove no longer working recipes

---
 recipes/deia.recipe             |  70 ----------
 recipes/respekt_magazine.recipe | 148 ---------------------
 recipes/respekt_web.recipe      | 225 --------------------------------
 3 files changed, 443 deletions(-)
 delete mode 100644 recipes/deia.recipe
 delete mode 100644 recipes/respekt_magazine.recipe
 delete mode 100644 recipes/respekt_web.recipe

diff --git a/recipes/deia.recipe b/recipes/deia.recipe
deleted file mode 100644
index ee17d58765..0000000000
--- a/recipes/deia.recipe
+++ /dev/null
@@ -1,70 +0,0 @@
-#!/usr/bin/env  python2
-__license__   = 'GPL v3'
-__author__    = 'Gerardo Diez'
-__copyright__ = 'Gerardo Diez<gerardo.diez.garcia@gmail.com>'
-description   = 'Main daily newspaper from Spain - v1.00 (05, Enero 2011)'
-__docformat__ = 'restructuredtext en'
-
-'''
-deia.com
-'''
-from calibre.web.feeds.recipes import BasicNewsRecipe
-
-class Deia(BasicNewsRecipe):
-	title			='Deia'
-	__author__		='Gerardo Diez'
-	publisher		='Editorial Iparraguirre, S.A'
-	category             	='news, politics, finances, world, spain, euskadi'
-	publication_type      	='newspaper'
-	oldest_article		=1
-	max_articles_per_feed	=100
-	simultaneous_downloads	=10
-	cover_url		='http://2.bp.blogspot.com/_RjrWzC6tI14/TM6jrPLaBZI/AAAAAAAAFaI/ayffwxidFEY/s1600/2009-10-13-logo-deia.jpg'
-	timefmt			='[%a, %d %b, %Y]'
-	encoding		='utf8'
-	language		='es'
-	remove_javascript	=True
-	remove_tags_after	=dict(id='Texto')
-	remove_tags_before	=dict(id='Texto')
-	remove_tags		=[dict(name='div', attrs={'class':['Herramientas ', 'Multimedia']})]
-	no_stylesheets		=True
-	extra_css		='h1 {margin-bottom: .15em;font-size: 2.7em; font-family: Georgia, "Times New Roman", Times, serif;} .Antetitulo {margin: 1em 0;text-transform: uppercase;color: #999;} .PieFoto {margin: .1em 0;padding: .5em .5em .5em .5em;background: #F0F0F0;} .PieFoto p {margin-bottom: 0;font-family: Georgia,"Times New Roman",Times,serif;font-weight: bold; font-style: italic; color: #666;}'
-	keep_only_tags		=[dict(name='div', attrs={'class':['Texto ', 'NoticiaFicha ']})]
-	feeds          		= [
-					(u'Bizkaia' ,u'http://www.deia.com/index.php/services/rss?seccion=bizkaia'),
-					(u'Bilbao' ,u'http://www.deia.com/index.php/services/rss?seccion=bilbao'),
-					(u'Hemendik eta Handik' ,u'http://www.deia.com/index.php/services/rss?seccion=hemendik-eta-handik'),
-					(u'Margen Derecha' ,u'http://www.deia.com/index.php/services/rss?seccion=margen-derecha'),
-					(u'Encartaciones y Margen Izquierda' ,u'http://www.deia.com/index.php/services/rss?seccion=margen-izquierda-encartaciones'),
-					(u'Costa' ,u'http://www.deia.com/index.php/services/rss?seccion=costa'),
-					(u'Duranguesado' ,u'http://www.deia.com/index.php/services/rss?seccion=duranguesado'),
-					(u'Llodio-Nervión' ,u'http://www.deia.com/index.php/services/rss?seccion=llodio-nervion'),
-					(u'Arratia-Nervión' ,u'http://www.deia.com/index.php/services/rss?seccion=arratia-nervion'),
-					(u'Uribe-Txorierri' ,u'http://www.deia.com/index.php/services/rss?seccion=uribe-txorierri'),
-					(u'Ecos de sociedad' ,u'http://www.deia.com/index.php/services/rss?seccion=ecos-de-sociedad'),
-					(u'Sucesos' ,u'http://www.deia.com/index.php/services/rss?seccion=sucesos'),
-					(u'Política' ,u'http://www.deia.com/index.php/services/rss?seccion=politica'),
-					(u'Euskadi' ,u'http://www.deia.com/index.php/services/rss?seccion=politica/euskadi'),
-					(u'España' ,u'http://www.deia.com/index.php/services/rss?seccion=politica/espana'),
-					(u'Sociedad',u'http://www.deia.com/index.php/services/rss?seccion=sociedad'),
-					(u'Euskadi' ,u'http://www.deia.com/index.php/services/rss?seccion=socidad/euskadi'),
-					(u'Sociedad.España' ,u'http://www.deia.com/index.php/services/rss?seccion=sociedad/espana'),
-					(u'Ocio y Cultura' ,u'http://www.deia.com/index.php/services/rss?seccion=ocio-y-cultura'),
-					#(u'Cultura' ,u'http://www.deia.com/index.php/services/rss?seccion=cultura'),
-					#(u'Ocio' ,u'http://www.deia.com/index.php/services/rss?seccion=ocio'),
-					(u'On' ,u'http://www.deia.com/index.php/services/rss?seccion=on'),
-					(u'Agenda' ,u'http://www.deia.com/index.php/services/rss?seccion=agenda'),
-					(u'Comunicación' ,u'http://www.deia.com/index.php/services/rss?seccion=comunicacion'),
-					(u'Viajes' ,u'http://www.deia.com/index.php/services/rss?seccion=viajes'),
-					(u'¡Mundo!' ,u'http://www.deia.com/index.php/services/rss?seccion=que-mundo'),
-					(u'Humor' ,u'http://www.deia.com/index.php/services/rss?seccion=humor'),
-					(u'Opinión' ,u'http://www.deia.com/index.php/services/rss?seccion=opinion'),
-					(u'Editorial' ,u'http://www.deia.com/index.php/services/rss?seccion=editorial'),
-					(u'Tribuna abierta' ,u'http://www.deia.com/index.php/services/rss?seccion=tribuna-abierta'),
-					(u'Colaboración' ,u'http://www.deia.com/index.php/services/rss?seccion=colaboracion'),
-					(u'Columnistas' ,u'http://www.deia.com/index.php/services/rss?seccion=columnistas'),
-					(u'Deportes' ,u'http://www.deia.com/index.php/services/rss?seccion=deportes'),
-					(u'Athletic' ,u'http://www.deia.com/index.php/services/rss?seccion=athletic'),
-					(u'Economía' ,'http://www.deia.com/index.php/services/rss?seccion=economia'),
-					(u'Mundo' ,u'http://www.deia.com/index.php/services/rss?seccion=mundo')]
-
diff --git a/recipes/respekt_magazine.recipe b/recipes/respekt_magazine.recipe
deleted file mode 100644
index 2d5998d5e0..0000000000
--- a/recipes/respekt_magazine.recipe
+++ /dev/null
@@ -1,148 +0,0 @@
-#!/usr/bin/python2
-# -*- coding: utf-8 -*-
-# License:      GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html
-# Copyright:    tomashnyk@gmail.com
-
-__license__     = 'GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html'
-__copyright__   = 'tomashnyk@gmail.com'
-
-import re
-from calibre.web.feeds.recipes import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import BeautifulSoup
-#This imports the version bundled with Calibre
-import lxml
-from lxml.builder import E
-
-class respektRecipe(BasicNewsRecipe):
-    __author__  = u'Tomáš Hnyk'
-    title = u'Respekt - Magazine'
-    publisher = u'Respekt Publishing a. s.'
-    description = u'Articles from the printed edition, password needed for full access'
-    encoding = 'cp1250'
-    language = 'cs'
-    remove_javascript = True
-    extra_css = 'p {text-align:justify} \
-                 ul {color:black} \
-                 .image_caption {font-size:50%;font-style:italic;} \
-                 .author {text-align:left;} \
-                 p.indent_first_line {text-indent:30px;}'
-    remove_tags_before = dict(name='div',attrs={'class':['l']})
-    remove_tags_after = dict(id='text')
-    remove_tags = [dict(name='ul', attrs={'class':['tabs-d'],'id':['comm']}), \
-    dict(name='div',attrs={'class':['slot','reklama','date']}), \
-    dict(name='span', attrs={'class':['detail-vykrik']}), \
-    dict(name='p', attrs={'class':['detail-vykrik']}), \
-    dict(name='div', attrs={'id':['col123d-video','col123d-infographic','col123d-gallery','col12d-discussion']}),  # soup>lxml>soup in preprocess requires this
-    dict(name='strong', attrs={'class':['detail-vykrik']}),
-    dict(name='script')]
-    # this makes authors left-aligned by not using the author class)
-    preprocess_regexps = [(re.compile(r'<div class="author">', re.DOTALL|re.IGNORECASE), lambda match: '<div class="">')]
-    # remove empty tags
-    preprocess_regexps.append((re.compile(r'<strong> </strong>', re.DOTALL|re.IGNORECASE), lambda match: ' '))
-    preprocess_regexps.append((re.compile(r'<strong>&nbsp;</strong>', re.DOTALL|re.IGNORECASE), lambda match: '&nbsp;'))
-    preprocess_regexps.append((re.compile(r'<p></p>', re.DOTALL|re.IGNORECASE), lambda match: ''))
-    preprocess_regexps.append((re.compile(r'font-size: 12px', re.DOTALL|re.IGNORECASE), lambda match: ''))
-    preprocess_regexps.append((re.compile(r'color: #[0-9]*', re.DOTALL|re.IGNORECASE), lambda match: ''))
-
-    def get_cover_url(self):
-        soup = self.index_to_soup('http://respekt.ihned.cz/')
-        cover = soup.findAll('div', attrs={'class':'cover'})[0].find('img')['src']
-        return cover
-
-    needs_subscription = True
-
-    def get_browser(self):
-        br = BasicNewsRecipe.get_browser(self)
-        if self.username is not None and self.password is not None:
-            br.open('http://muj-ucet.ihned.cz/')
-            br.select_form(name='login')
-            br['login[nick]'] = self.username
-            br['login[pass]'] = self.password
-            br.submit()
-        return br
-
-    def parse_index(self):
-        raw = self.index_to_soup('http://respekt.ihned.cz/aktualni-cislo/', raw=True)
-        root = lxml.html.fromstring(raw)
-        ans = []
-        for article in root.xpath("//div[@class='ow-enclose']/div[@class='ow']"):
-            section_title = article.xpath(".//span[text()='(rubrika: ']")[0].find("a").text
-            date = article.xpath("span[@class='date-author']")[0].text[:-3]
-            title = article.find("h2").find("a").text
-            url = article.find('h2').find('a').get('href')
-            link = {'title':title,'url':url,'date':date}
-            for section in ans:
-                if section[0] == section_title:
-                    section[1].append(link)
-                    break
-            else:
-                ans.append((section_title,[link]))
-        return ans
-
-    def cleanup(self):
-        self.browser.open('http://muj-ucet.ihned.cz/?login[logout]=1')
-
-
-    def preprocess_html(self,soup):
-        raw = u''.join(unicode(a) for a in soup.contents)
-        root = lxml.html.fromstring(raw)
-
-        # Make image captions visible
-        body = root.xpath("//div[@id='text']")[0]
-        add = 0
-        for index, element in enumerate(body):
-            try:
-                if element.tag == 'img':
-                    body.insert(index+add+1,E.p(element.get('title'),{"class":"image_caption"}))
-                    add += 1
-            except:
-                pass
-
-        # Add length of the articles in words after author
-        article_length = str(len(body.text_content().split(' '))) + ' slov'
-        root.xpath("//div[@class='author-image']/div[@class='']/ul")[0].append(E.li(article_length))
-
-        # Make perex (subheading) start on a new line
-        root.xpath("//h1")[0].append(E.br(''))
-
-         # Indent paragraphs when typographically suitable
-        parse = True
-        # There are only single paragraphs in these sections
-        if root.xpath("//title")[0].text == u"Deset českých zpráv, které by vás neměly minout | Deset českých zpráv - RESPEKT.IHNED.CZ":
-            parse = False
-        if root.xpath("//title")[0].text == u"Deset zahraničních zpráv, které by vás neměly minout | Deset světových zpráv - RESPEKT.IHNED.CZ":
-            parse = False
-        if parse:
-            # First paragraph is never indented
-            paragraphs = root.xpath('//p')
-            # Clear the formatting a little bit by removing these attributes
-            for par in paragraphs:
-                if 'class' in par.keys():
-                    if par.attrib['class'] == 'detail-odstavec':
-                        par.attrib.pop('class')
-            paragraphs.reverse()
-            for par in paragraphs[:-1]:
-                try:
-                    # <strong> in the beginning of this paragraph means no indenting as well as ellipses as the only text in paragraph
-                    if len(par) > 0:
-                        if (par.text is None and par.getchildren()[0].tag == 'strong'):
-                            continue
-                    elif par.getprevious().text == u'\u2026':
-                        continue
-                    indent = False
-                    # Either indent if the paragraphs are the same
-                    if par.getprevious().attrib == par.attrib:
-                        indent = True
-                    # Or else if the first paragraph of the text was special
-                    if 'class' in par.getprevious().keys():
-                        par_name = par.getprevious().attrib['class']
-                        if par_name == '01prvniodstavecrepublicblok' or par_name == 'Zkladnodstavec' or par_name == '01titulekhlavn':
-                            indent = True
-                    if indent:
-                        for key in par.keys():
-                            par.attrib.pop(key)
-                        par.attrib['class']="indent_first_line"
-                except:
-                    pass
-
-        return(BeautifulSoup(lxml.etree.tostring(root,encoding=unicode)))
diff --git a/recipes/respekt_web.recipe b/recipes/respekt_web.recipe
deleted file mode 100644
index b65d57459d..0000000000
--- a/recipes/respekt_web.recipe
+++ /dev/null
@@ -1,225 +0,0 @@
-#!/usr/bin/python2
-# -*- coding: utf-8 -*-
-# License:      GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html
-# Copyright:    tomashnyk@gmail.com
-
-__license__     = 'GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html'
-__copyright__   = 'tomashnyk@gmail.com'
-
-import re,os,datetime
-from calibre.web.feeds.recipes import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import BeautifulSoup
-from calibre.constants import config_dir
-# This imports the version bundled with Calibre
-import lxml
-from lxml.builder import E
-
-class respektWebRecipe(BasicNewsRecipe):
-    __author__  = u'Tomáš Hnyk'
-    title = u'Respekt - Web'
-    publisher = u'Respekt Publishing a. s.'
-    description = u'Free articles from respekt.cz website'
-    encoding = 'cp1250'
-    language = 'cs'
-    remove_javascript = True
-    cover_url = 'http://respekt.ihned.cz/img/R/respekt_logo.png'
-    extra_css = 'p {text-align:justify} \
-                 ul {color:black} \
-                 .image_caption {font-size:50%;font-style:italic;} \
-                 .author {text-align:left;} \
-                 p.indent_first_line {text-indent:30px;}'
-    remove_tags_before = dict(name='div',attrs={'class':['l']})
-    remove_tags_after = dict(id='text')
-    remove_tags = [dict(name='ul', attrs={'class':['tabs-d'],'id':['comm']}),
-    dict(name='div',attrs={'class':['slot','reklama','date']}),
-    dict(name='span', attrs={'class':['detail-vykrik']}),
-    dict(name='p', attrs={'class':['detail-vykrik']}),
-    dict(name='div', attrs={'id':['col123d-video','col123d-infographic','col123d-gallery','col12d-discussion']}),  # soup>lxml>soup in prprocess requires this
-    dict(name='strong', attrs={'class':['detail-vykrik']}),
-    dict(name='script')]
-    # this makes authors left-aligned by not using the author class)
-    preprocess_regexps = [(re.compile(r'<div class="author">', re.DOTALL|re.IGNORECASE), lambda match: '<div class="">')]
-    # remove empty tags
-    preprocess_regexps.append((re.compile(r'<strong> </strong>', re.DOTALL|re.IGNORECASE), lambda match: ' '))
-    preprocess_regexps.append((re.compile(r'<strong>&nbsp;</strong>', re.DOTALL|re.IGNORECASE), lambda match: '&nbsp;'))
-    preprocess_regexps.append((re.compile(r'<p></p>', re.DOTALL|re.IGNORECASE), lambda match: ''))
-    preprocess_regexps.append((re.compile(r'font-size: 12px', re.DOTALL|re.IGNORECASE), lambda match: ''))
-    preprocess_regexps.append((re.compile(r'color: #[0-9]*', re.DOTALL|re.IGNORECASE), lambda match: ''))
-
-    def parse_index(self):
-        # Read already downloaded articles
-        recipe_dir = os.path.join(config_dir,'recipes')
-        old_articles = os.path.join(recipe_dir,self.title)
-        past_items = []
-        if os.path.exists(old_articles):
-            with file(old_articles) as f:
-                for h in f:
-                    l = h.strip().split(" ")
-                    past_items.append((l[0]," ".join(l[1:])))
-        old_urls = [x[0] for x in past_items]
-        count_items = {}
-        current_items = []
-        # Keep a list of only 20 latest articles for each section
-        past_items.reverse()
-        for item in past_items:
-            if item[1] in count_items.keys():
-                if count_items[item[1]] < 20:
-                    count_items[item[1]] += 1
-                    current_items.append(item)
-            else:
-                count_items[item[1]] = 1
-                current_items.append(item)
-        current_items.reverse()
-
-        sections = []
-        # Get the webpages to download lists of articles from
-        raw = self.index_to_soup('http://respekt.ihned.cz/sloupky-redaktoru/', raw=True)
-        root = lxml.html.fromstring(raw)
-        sections = []
-        for section in root.xpath("//div[@class='ow-enclose sr']/table/tr/td"):
-            try:
-                url = section.find('a').get('href')
-                if not ('?m=authors&person[id]=' in url):
-                    sections.append((url,section.find('a').find('b').text))
-            except:
-                pass
-        sections.append(('http://respekt.ihned.cz/respekt-dj/','Respekt DJ'))
-        sections.append(('http://respekt.ihned.cz/fokus/','Fokus'))
-        sections.append(('http://respekt.ihned.cz/respekt-hub/','Respekt Hub'))
-        sections.append(('http://respekt.ihned.cz/rozhovory/','Rozhovory'))
-        sections.append(('http://respekt.ihned.cz/glosy/','Glosy'))
-
-        # Get the list of articles
-        ans = []
-        for section in sections:
-            raw = self.index_to_soup(section[0], raw=True)
-            root = lxml.html.fromstring(raw)
-            list_of_articles = []
-            articles = root.xpath("//div[@class='ow-enclose']/div[@class='ow']")
-            # Sort the articles in a section from oldest to newest
-            articles.reverse()
-            for article in articles:
-                date = getattr(article.xpath("span[@class='date-author']")[0],'text','')[:-3]
-                author = getattr(article.xpath("span[@class='date-author']")[0].find("a"),'text','')
-                title = getattr(article.find("h2").find("a"),'text','')
-                url = article.find('h2').find('a').get('href')
-                # Only download new articles
-                if url not in old_urls:
-                    old_urls.append(url)
-                    current_items.append((url,section[1]))
-                    list_of_articles.append({'title':title,'url':url,'date':date,'author':author})
-            # Redownload this page next time if it is still being updated (between 7 and 17 GMT generally, so make the limits a little bit bigger):wq
-            if section[1] == 'Respekt DJ':
-                if list_of_articles:
-                    if datetime.datetime.today().weekday() in range(0,5) and 6 < datetime.datetime.utcnow().hour  < 17:
-                        # list_of_articles = list_of_articles[:-1]
-                        current_items = current_items[:-1]
-            if list_of_articles:
-                ans.append((section[1],list_of_articles))
-        # Write already downloaded articles
-        with file(old_articles,'w') as f:
-            f.write('\n'.join('{} {}'.format(*x) for x in current_items))
-        return ans
-
-    # For some reason, the following does not work:
-    # preprocess_regexps.append((re.compile(r'<br/><br/>', re.DOTALL|re.IGNORECASE), lambda match: '</p><p>'))
-    def preprocess_raw_html(self, raw_html, url):
-        return re.sub("<br /><br />","</p><p>",raw_html)
-
-    def preprocess_html(self,soup):
-        raw = u''.join(unicode(a) for a in soup.contents)
-        root = lxml.html.fromstring(raw)
-        # Make image captions visible
-        body = root.xpath("//div[@id='text']")[0]
-        add = 0
-        for index, element in enumerate(body):
-            try:
-                if element.tag == 'img':
-                    body.insert(index+add+1,E.p(element.get('title'),{"class":"image_caption"}))
-                    add += 1
-            except:
-                pass
-        # Make captions visible on the website have the same style
-        try:
-            root.xpath("//div[@class='hlavni-obrazek-popis']")[0].attrib['class'] = 'image_caption'
-        except:
-            pass
-        # For DJ, the perex is always the same, so remove it
-        if root.xpath("//title")[0].text.split("|")[-1] == u' Respekt DJ - RESPEKT.CZ':
-
-            perex = root.xpath("//div[@id='perex']")[0]
-            clean = root.xpath("//div[@class='clean']")[0]
-            perex.getparent().remove(perex)
-            clean.getparent().remove(clean)
-
-            # DJ section gets mal-formatted on kindle otherwise
-            for i in root.xpath("//h2[@class='d-dj-t']"):
-                i.attrib['class'] = ''
-                E.style = "font-size:60%;font-weight:normal;"
-                time = E('span',i.getprevious().text_content(),style=E.style)
-                # Time should be ahead of the title
-                time.tail = ' ' + i.text
-                i.text = ''
-                i.insert(0,time)
-            for i in root.xpath("//div[@class='d-dj-d']"):
-                i.attrib['class'] = ''
-                i.xpath("div/span")[0].text = ''
-            for i in root.xpath("//div[@class='d-dj-b']"):
-                i.attrib['class'] = ''
-
-            # Make captions visible on the website have the same style
-            root.xpath("//div[@class='hlavni-obrazekDJ-popis']")[0].attrib['class'] = 'image_caption'
-
-            # Reverse the entries so that the earliest are at the top
-            entries = root.xpath("//div[@class='d-dj-i']")
-            entries.reverse()
-            dj_body = entries[0].getparent()
-            for entry in entries:
-                dj_body.remove(entry)
-                dj_body.append(entry)
-
-        # We are not interested in this paragraph as it stays the same and is essentialy an ad
-        if root.xpath("//title")[0].text.split("|")[-1] == u' Audit Jana Macháčka - Respekt.iHNed.cz':
-            ad = root.xpath("//p[@id='ajmonf']")[0]
-            ad.getparent().remove(ad)
-
-        # Add length of the articles in words after author
-        article_length = str(len(body.text_content().split(' '))) + ' slov'
-        root.xpath("//div[@class='author-image']/div[@class='']/ul")[0].append(E.li(article_length))
-
-        # Make perex (subheading) start on a new line
-        root.xpath("//h1")[0].append(E.br(''))
-
-        # Indent paragraphs when typographically suitable
-        # First paragraph is never indented
-        paragraphs = root.xpath('//p')
-        # Clear the formatting a little bit by removing these attributes
-        for par in paragraphs:
-            if 'class' in par.keys():
-                if par.attrib['class'] == 'detail-odstavec':
-                    par.attrib.pop('class')
-        paragraphs.reverse()
-        for par in paragraphs[:-1]:
-            try:
-                # <strong> in the beginning of this paragraph means no indenting as well as ellipses as the only text in paragraph
-                if len(par) > 0:
-                    if (par.text is None and par.getchildren()[0].tag == 'strong'):
-                        continue
-                elif par.getprevious().text == u'\u2026':
-                    continue
-                indent = False
-                # Either indent if the paragraphs are the same
-                if par.getprevious().attrib == par.attrib:
-                    indent = True
-                # Or else if the first paragraph of the text was special
-                if 'class' in par.getprevious().keys():
-                    par_name = par.getprevious().attrib['class']
-                    if par_name == '01prvniodstavecrepublicblok' or par_name == 'Zkladnodstavec' or par_name == '01titulekhlavn':
-                        indent = True
-                if indent:
-                    for key in par.keys():
-                        par.attrib.pop(key)
-                    par.attrib['class']="indent_first_line"
-            except:
-                pass
-        return(BeautifulSoup(lxml.etree.tostring(root,encoding=unicode)))