mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge branch 'master' of https://github.com/unkn0w7n/calibre
This commit is contained in:
commit
7792a0c3be
@ -34,16 +34,25 @@ def parse_img_grid(g):
|
|||||||
yield '</div>'
|
yield '</div>'
|
||||||
|
|
||||||
def parse_cnt(cnt):
|
def parse_cnt(cnt):
|
||||||
|
txt = ''
|
||||||
if cnt['__typename'] == 'TextInline':
|
if cnt['__typename'] == 'TextInline':
|
||||||
if cnt.get('formats'):
|
if cnt.get('formats'):
|
||||||
for fmt in cnt.get('formats', {}):
|
for fmt in cnt.get('formats', {}):
|
||||||
|
if fmt['__typename'] == 'ItalicFormat':
|
||||||
|
txt += '<i>'
|
||||||
if fmt['__typename'] == 'LinkFormat':
|
if fmt['__typename'] == 'LinkFormat':
|
||||||
hrf = fmt['url']
|
txt += '<a href="{}">'.format(fmt['url'])
|
||||||
yield '<a href="{}">'.format(hrf) + cnt['text'] + '</a>'
|
txt += cnt['text']
|
||||||
else:
|
elif cnt['__typename'] == 'LineBreakInline':
|
||||||
yield cnt['text']
|
txt += '<br/>'
|
||||||
else:
|
if '<i>' in txt and '<a href' in txt:
|
||||||
yield cnt['text']
|
yield txt + '</a></i>'
|
||||||
|
elif '<i>' in txt:
|
||||||
|
yield txt + '</i>'
|
||||||
|
elif '<a href' in txt:
|
||||||
|
yield txt + '</a>'
|
||||||
|
else:
|
||||||
|
yield txt
|
||||||
|
|
||||||
def parse_byline(byl):
|
def parse_byline(byl):
|
||||||
for b in byl.get('bylines', {}):
|
for b in byl.get('bylines', {}):
|
||||||
@ -69,8 +78,8 @@ def header_parse(h):
|
|||||||
if h['ledeMedia'].get('__typename', '') == 'ImageBlock':
|
if h['ledeMedia'].get('__typename', '') == 'ImageBlock':
|
||||||
yield ''.join(parse_image(h['ledeMedia']['media']))
|
yield ''.join(parse_image(h['ledeMedia']['media']))
|
||||||
if h.get('byline'):
|
if h.get('byline'):
|
||||||
yield '<br><div class="byl">'
|
yield '<div class="byl"><br/>'
|
||||||
yield '\t'.join(parse_byline(h['byline']))
|
yield '\t' + '\t'.join(parse_byline(h['byline']))
|
||||||
if h.get('timestampBlock'):
|
if h.get('timestampBlock'):
|
||||||
yield '\t<div>' + iso_date(h['timestampBlock']['timestamp']) + '</div>'
|
yield '\t<div>' + iso_date(h['timestampBlock']['timestamp']) + '</div>'
|
||||||
yield '</div>'
|
yield '</div>'
|
||||||
@ -81,33 +90,35 @@ def article_parse(data):
|
|||||||
if x.get('__typename', '') in {'HeaderBasicBlock', 'HeaderFullBleedVerticalBlock', 'HeaderFullBleedHorizontalBlock'}:
|
if x.get('__typename', '') in {'HeaderBasicBlock', 'HeaderFullBleedVerticalBlock', 'HeaderFullBleedHorizontalBlock'}:
|
||||||
yield '\n'.join(header_parse(x))
|
yield '\n'.join(header_parse(x))
|
||||||
elif x.get('__typename', '') == 'ParagraphBlock':
|
elif x.get('__typename', '') == 'ParagraphBlock':
|
||||||
yield '<p>'
|
p_txt = ''
|
||||||
for para in x['content']:
|
for para in x['content']:
|
||||||
yield '\t'.join(parse_cnt(para))
|
p_txt += ''.join(parse_cnt(para))
|
||||||
yield '</p>'
|
if p_txt.strip():
|
||||||
|
yield '<p>' + p_txt + '</p>'
|
||||||
elif x.get('__typename', '') in {'Heading2Block', 'Heading3Block'}:
|
elif x.get('__typename', '') in {'Heading2Block', 'Heading3Block'}:
|
||||||
yield '<h4>'
|
h4_txt = ''
|
||||||
for h2 in x['content']:
|
for h2 in x['content']:
|
||||||
yield '\t'.join(parse_cnt(h2))
|
h4_txt += ''.join(parse_cnt(h2))
|
||||||
yield '</h4>'
|
if h4_txt.strip():
|
||||||
|
yield '<h4>' + h4_txt + '</h4>'
|
||||||
elif x.get('__typename', '') == 'Heading1Block':
|
elif x.get('__typename', '') == 'Heading1Block':
|
||||||
yield '<h1>'
|
h1_txt = ''
|
||||||
for h1 in x['content']:
|
for h1 in x['content']:
|
||||||
yield '\t'.join(parse_cnt(h1))
|
h1_txt += ''.join(parse_cnt(h1))
|
||||||
yield '</h1>'
|
if h1_txt.strip():
|
||||||
|
yield '<h1>' + h1_txt + '</h1>'
|
||||||
elif x.get('__typename', '') == 'BylineBlock':
|
elif x.get('__typename', '') == 'BylineBlock':
|
||||||
yield '<br><div class="byl">'
|
yield '<div class="byl">\n<br/>\t' + '\t'.join(parse_byline(x)) + '</div>'
|
||||||
yield '\t'.join(parse_byline(x))
|
|
||||||
yield '</div>'
|
|
||||||
elif x.get('__typename', '') == 'ImageBlock':
|
elif x.get('__typename', '') == 'ImageBlock':
|
||||||
yield ''.join(parse_image(x['media']))
|
yield ''.join(parse_image(x['media']))
|
||||||
elif x.get('__typename', '') == 'GridBlock':
|
elif x.get('__typename', '') == 'GridBlock':
|
||||||
yield ''.join(parse_img_grid(x))
|
yield ''.join(parse_img_grid(x))
|
||||||
elif x.get('content'):
|
elif x.get('content'):
|
||||||
yield '<p><i>'
|
o_txt = ''
|
||||||
for i in x['content']:
|
for i in x['content']:
|
||||||
yield '\t'.join(parse_cnt(i))
|
o_txt += ''.join(parse_cnt(i))
|
||||||
yield '</i></p>'
|
if o_txt.strip():
|
||||||
|
yield '<p><i>' + o_txt + '</i></p>'
|
||||||
yield "</body></html>"
|
yield "</body></html>"
|
||||||
|
|
||||||
|
|
||||||
@ -207,5 +218,5 @@ class nytFeeds(BasicNewsRecipe):
|
|||||||
if w and isinstance(w, str):
|
if w and isinstance(w, str):
|
||||||
res = '-' + w + '.jpg'
|
res = '-' + w + '.jpg'
|
||||||
for img in soup.findAll('img', attrs={'src':True}):
|
for img in soup.findAll('img', attrs={'src':True}):
|
||||||
img['src'] = img['src'].rsplit('-', 1)[0] + res
|
img['src'] = img['src'].rsplit('-article', 1)[0] + res
|
||||||
return soup
|
return soup
|
||||||
|
Loading…
x
Reference in New Issue
Block a user