mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge from trunk
This commit is contained in:
commit
b2dceb1a7a
@ -1,6 +1,3 @@
|
||||
'''
|
||||
dnaindia.com
|
||||
'''
|
||||
import re
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
@ -12,6 +9,10 @@ class DNAIndia(BasicNewsRecipe):
|
||||
language = 'en_IN'
|
||||
|
||||
encoding = 'cp1252'
|
||||
use_embedded_content = False
|
||||
|
||||
no_stylesheets = True
|
||||
auto_cleanup = True
|
||||
|
||||
feeds = [
|
||||
('Top News', 'http://www.dnaindia.com/syndication/rss_topnews.xml'),
|
||||
@ -22,15 +23,10 @@ class DNAIndia(BasicNewsRecipe):
|
||||
('World', 'http://www.dnaindia.com/syndication/rss,catid-9.xml'),
|
||||
('Money', 'http://www.dnaindia.com/syndication/rss,catid-4.xml'),
|
||||
('Sports', 'http://www.dnaindia.com/syndication/rss,catid-6.xml'),
|
||||
('After Hours', 'http://www.dnaindia.com/syndication/rss,catid-7.xml'),
|
||||
('Digital Life', 'http://www.dnaindia.com/syndication/rss,catid-1089741.xml'),
|
||||
('After Hours', 'http://www.dnaindia.com/syndication/rss,catid-7.xml')
|
||||
]
|
||||
remove_tags = [{'id':['footer', 'lhs-col']}, {'class':['bottom', 'categoryHead',
|
||||
'article_tools']}]
|
||||
keep_only_tags = dict(id='middle-col')
|
||||
remove_tags_after=[dict(attrs={'id':'story'})]
|
||||
remove_attributes=['style']
|
||||
no_stylesheets = True
|
||||
|
||||
|
||||
|
||||
def print_version(self, url):
|
||||
match = re.search(r'newsid=(\d+)', url)
|
||||
|
@ -9,11 +9,16 @@ class TimesOfIndia(BasicNewsRecipe):
|
||||
max_articles_per_feed = 25
|
||||
|
||||
no_stylesheets = True
|
||||
keep_only_tags = [{'class':['maintable12', 'prttabl']}]
|
||||
remove_attributes = ['style']
|
||||
keep_only_tags = [
|
||||
{'class':re.compile(r'maintable12|prttabl')},
|
||||
{'id':['mod-article-header',
|
||||
'mod-a-body-after-first-para', 'mod-a-body-first-para']},
|
||||
]
|
||||
remove_tags = [
|
||||
dict(style=lambda x: x and 'float' in x),
|
||||
{'class':['prvnxtbg', 'footbdrin', 'bcclftr']},
|
||||
]
|
||||
{'class':re.compile('tabsintbgshow|prvnxtbg')},
|
||||
{'id':['fbrecommend', 'relmaindiv']}
|
||||
]
|
||||
|
||||
feeds = [
|
||||
('Top Stories',
|
||||
@ -41,6 +46,8 @@ class TimesOfIndia(BasicNewsRecipe):
|
||||
]
|
||||
|
||||
def get_article_url(self, article):
|
||||
# Times of India sometimes serves an ad page instead of the article,
|
||||
# this code, detects and circumvents that
|
||||
url = BasicNewsRecipe.get_article_url(self, article)
|
||||
if '/0Ltimesofindia' in url:
|
||||
url = url.partition('/0L')[-1]
|
||||
@ -61,6 +68,3 @@ class TimesOfIndia(BasicNewsRecipe):
|
||||
|
||||
return url
|
||||
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
return soup
|
||||
|
@ -73,6 +73,20 @@ class Worker(Thread): # Get details {{{
|
||||
8: ['août'],
|
||||
9: ['sept'],
|
||||
12: ['déc'],
|
||||
},
|
||||
'es': {
|
||||
1: ['enero'],
|
||||
2: ['febrero'],
|
||||
3: ['marzo'],
|
||||
4: ['abril'],
|
||||
5: ['mayo'],
|
||||
6: ['junio'],
|
||||
7: ['julio'],
|
||||
8: ['agosto'],
|
||||
9: ['septiembre', 'setiembre'],
|
||||
10: ['octubre'],
|
||||
11: ['noviembre'],
|
||||
12: ['diciembre'],
|
||||
},
|
||||
'jp': {
|
||||
1: [u'1月'],
|
||||
@ -101,13 +115,16 @@ class Worker(Thread): # Get details {{{
|
||||
text()="Dettagli prodotto" or \
|
||||
text()="Product details" or \
|
||||
text()="Détails sur le produit" or \
|
||||
text()="Detalles del producto" or \
|
||||
text()="登録情報"]/../div[@class="content"]
|
||||
'''
|
||||
# Editor: is for Spanish
|
||||
self.publisher_xpath = '''
|
||||
descendant::*[starts-with(text(), "Publisher:") or \
|
||||
starts-with(text(), "Verlag:") or \
|
||||
starts-with(text(), "Editore:") or \
|
||||
starts-with(text(), "Editeur") or \
|
||||
starts-with(text(), "Editor:") or \
|
||||
starts-with(text(), "出版社:")]
|
||||
'''
|
||||
self.language_xpath = '''
|
||||
@ -116,12 +133,14 @@ class Worker(Thread): # Get details {{{
|
||||
or text() = "Language" \
|
||||
or text() = "Sprache:" \
|
||||
or text() = "Lingua:" \
|
||||
or text() = "Idioma:" \
|
||||
or starts-with(text(), "Langue") \
|
||||
or starts-with(text(), "言語") \
|
||||
]
|
||||
'''
|
||||
|
||||
self.ratings_pat = re.compile(
|
||||
r'([0-9.]+) ?(out of|von|su|étoiles sur|つ星のうち) ([\d\.]+)( (stars|Sternen|stelle)){0,1}')
|
||||
r'([0-9.]+) ?(out of|von|su|étoiles sur|つ星のうち|de un máximo de) ([\d\.]+)( (stars|Sternen|stelle|estrellas)){0,1}')
|
||||
|
||||
lm = {
|
||||
'eng': ('English', 'Englisch'),
|
||||
@ -143,6 +162,7 @@ class Worker(Thread): # Get details {{{
|
||||
for i, vals in self.months.iteritems():
|
||||
for x in vals:
|
||||
ans = ans.replace(x, self.english_months[i])
|
||||
ans = ans.replace(' de ', ' ')
|
||||
return ans
|
||||
|
||||
def run(self):
|
||||
@ -422,6 +442,7 @@ class Amazon(Source):
|
||||
'uk' : _('UK'),
|
||||
'it' : _('Italy'),
|
||||
'jp' : _('Japan'),
|
||||
'es' : _('Spain'),
|
||||
}
|
||||
|
||||
options = (
|
||||
@ -789,6 +810,16 @@ if __name__ == '__main__': # tests {{{
|
||||
),
|
||||
] # }}}
|
||||
|
||||
es_tests = [ # {{{
|
||||
(
|
||||
{'identifiers':{'isbn': '8483460831'}},
|
||||
[title_test('Tiempos Interesantes',
|
||||
exact=True), authors_test(['Terry Pratchett'])
|
||||
]
|
||||
|
||||
),
|
||||
] # }}}
|
||||
|
||||
jp_tests = [ # {{{
|
||||
( # isbn -> title, authors
|
||||
{'identifiers':{'isbn': '9784101302720' }},
|
||||
@ -804,6 +835,6 @@ if __name__ == '__main__': # tests {{{
|
||||
] # }}}
|
||||
|
||||
test_identify_plugin(Amazon.name, com_tests)
|
||||
#test_identify_plugin(Amazon.name, jp_tests)
|
||||
#test_identify_plugin(Amazon.name, es_tests)
|
||||
# }}}
|
||||
|
||||
|
@ -173,6 +173,10 @@ class PDFWriter(QObject): # {{{
|
||||
printer.setOutputFormat(QPrinter.NativeFormat)
|
||||
self.view.print_(printer)
|
||||
printer.abort()
|
||||
else:
|
||||
# The document is so corrupt that we can't render the page.
|
||||
self.loop.exit(0)
|
||||
raise Exception('Document cannot be rendered.')
|
||||
self._render_book()
|
||||
|
||||
def _delete_tmpdir(self):
|
||||
@ -207,11 +211,14 @@ class PDFWriter(QObject): # {{{
|
||||
try:
|
||||
outPDF = PdfFileWriter(title=self.metadata.title, author=self.metadata.author)
|
||||
for item in self.combine_queue:
|
||||
with open(item, 'rb') as item_stream:
|
||||
inputPDF = PdfFileReader(item_stream)
|
||||
for page in inputPDF.pages:
|
||||
outPDF.addPage(page)
|
||||
outPDF.write(self.out_stream)
|
||||
# The input PDF stream must remain open until the final PDF
|
||||
# is written to disk. PyPDF references pages added to the
|
||||
# final PDF from the input PDF on disk. It does not store
|
||||
# the pages in memory so we can't close the input PDF.
|
||||
inputPDF = PdfFileReader(open(item, 'rb'))
|
||||
for page in inputPDF.pages:
|
||||
outPDF.addPage(page)
|
||||
outPDF.write(self.out_stream)
|
||||
finally:
|
||||
self._delete_tmpdir()
|
||||
self.loop.exit(0)
|
||||
|
Loading…
x
Reference in New Issue
Block a user