diff --git a/recipes/andhrajyothy_ap.recipe b/recipes/andhrajyothy_ap.recipe new file mode 100644 index 0000000000..3e7834098c --- /dev/null +++ b/recipes/andhrajyothy_ap.recipe @@ -0,0 +1,122 @@ +from calibre.web.feeds.news import BasicNewsRecipe +import json +from datetime import date +from collections import defaultdict + +# figure out your local edition id from the log of this recipe +edi_id = 182 # NTR VIJAYAWADA - 182 + +today = date.today().strftime('%d/%m/%Y') + +# for older edition +# today = '15/01/2024' + +day, month, year = (int(x) for x in today.split('/')) +dt = date(year, month, day) +today = today.replace('/', '%2F') + +index = 'https://epaper.andhrajyothy.com' + +class andhra(BasicNewsRecipe): + title = 'ఆంధ్రజ్యోతి - ఆంధ్రప్రదేశ్' + language = 'te' + __author__ = 'unkn0wn' + masthead_url = 'https://upload.wikimedia.org/wikipedia/commons/0/01/Andhra_Jyothi_newspaper_logo.png' + timefmt = ' [' + dt.strftime('%b %d, %Y') + ']' + description = 'Articles from the ABN Andhra Jyothy epaper, digital edition' + encoding = 'utf-8' + remove_empty_feeds = True + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + if self.output_profile.short_name.startswith('kindle'): + self.title = 'ఆంధ్రజ్యోతి ' + dt.strftime('%b %d, %Y') + + extra_css = ''' + .cap { text-align:center; font-size:small; } + img { display:block; margin:0 auto; } + ''' + + def parse_index(self): + + self.log( + '\n***\nif this recipe fails, report it on: ' + 'https://www.mobileread.com/forums/forumdisplay.php?f=228\n***\n' + ) + + get_edition = index + '/Home/GetEditionsHierarchy' + edi_data = json.loads(self.index_to_soup(get_edition, raw=True)) + self.log('## For your local edition id, modify this recipe to match your edi_id from the cities below\n') + for edi in edi_data: + if edi['org_location'] in {'Magazines', 'Navya Daily'}: + continue + self.log(edi['org_location']) + cities = [] + for edi_loc in edi['editionlocation']: + cities.append(edi_loc['Editionlocation'] + ' - ' + edi_loc['EditionId']) + self.log('\t', ',\n\t'.join(cities)) + + self.log('\nDownloading: Edition ID - ', edi_id) + url = index + '/Home/GetAllpages?editionid=' + str(edi_id) + '&editiondate=' + today + main_data = json.loads(self.index_to_soup(url, raw=True)) + + feeds_dict = defaultdict(list) + + for page in main_data: + sec_name = page['PageNo'] + 'వ పేజీ' + if page['PageNumber'] == 'Page 1': + self.cover_url = page['HighResolution'] + art = index + '/Home/getingRectangleObject?pageid=' + str(page['PageId']) + raw2 = self.index_to_soup(art, raw=True) + art_data = json.loads(raw2) + for snaps in art_data: + section = sec_name + url = str(snaps['OrgId']) + if snaps['ObjectType'] == 4: + continue + feeds_dict[section].append({"title": '', "url": url}) + return [(section, articles) for section, articles in feeds_dict.items()] + + def preprocess_raw_html(self, raw, *a): + data = json.loads(raw) + body = '' + for x in data['StoryContent']: + if x['Headlines']: + if len(x['Headlines']) > 0: + body += '

' + x['Headlines'][0].replace('\n', ' ') + '

' + for y in x['Headlines'][1:]: + body += '

' + y.replace('\n', ' ') + '

' + if data['LinkPicture']: + for pics in data['LinkPicture']: + if pics['fullpathlinkpic']: + body += '
'.format(pics['fullpathlinkpic']) + if pics['caption']: + body += '
' + pics['caption'] + '

' + for x in data['StoryContent']: + if x['Body'] and x['Body'] != '': + body += '' + x['Body'] + '' + # if data['filepathstorypic']: # this gives you a snap image of the article from page + # body += '

'.format(data['filepathstorypic'].replace('\\', '/')) + if body.strip() == '': + self.abort_article('no article') + return '
' + body + '
' + + def populate_article_metadata(self, article, soup, first): + article.url = '***' + h1 = soup.find('h1') + h4 = soup.find('h4') + body = soup.find(attrs={'class':'body'}) + if h4: + article.summary = self.tag_to_string(h4) + article.text_summary = article.summary + elif body: + article.summary = ' '.join(self.tag_to_string(body).split()[:15]) + '...' + article.text_summary = article.summary + article.title = 'ఆంధ్రజ్యోతి' + if h1: + article.title = self.tag_to_string(h1) + elif body: + article.title = ' '.join(self.tag_to_string(body).split()[:7]) + '...' + + def print_version(self, url): + return index + '/User/ShowArticleView?OrgId=' + url diff --git a/recipes/andhrajyothy_tel.recipe b/recipes/andhrajyothy_tel.recipe new file mode 100644 index 0000000000..7dc6449854 --- /dev/null +++ b/recipes/andhrajyothy_tel.recipe @@ -0,0 +1,122 @@ +from calibre.web.feeds.news import BasicNewsRecipe +import json +from datetime import date +from collections import defaultdict + +# figure out your local edition id from the log of this recipe +edi_id = 225 # TELANGANA MAIN II - 225 + +today = date.today().strftime('%d/%m/%Y') + +# for older edition +# today = '15/01/2024' + +day, month, year = (int(x) for x in today.split('/')) +dt = date(year, month, day) +today = today.replace('/', '%2F') + +index = 'https://epaper.andhrajyothy.com' + +class andhra(BasicNewsRecipe): + title = 'ఆంధ్రజ్యోతి - తెలంగాణ' + language = 'te' + __author__ = 'unkn0wn' + masthead_url = 'https://upload.wikimedia.org/wikipedia/commons/0/01/Andhra_Jyothi_newspaper_logo.png' + timefmt = ' [' + dt.strftime('%b %d, %Y') + ']' + description = 'Articles from the ABN Andhra Jyothy epaper, digital edition' + encoding = 'utf-8' + remove_empty_feeds = True + + def __init__(self, *args, **kwargs): + BasicNewsRecipe.__init__(self, *args, **kwargs) + if self.output_profile.short_name.startswith('kindle'): + self.title = 'ఆంధ్రజ్యోతి ' + dt.strftime('%b %d, %Y') + + extra_css = ''' + .cap { text-align:center; font-size:small; } + img { display:block; margin:0 auto; } + ''' + + def parse_index(self): + + self.log( + '\n***\nif this recipe fails, report it on: ' + 'https://www.mobileread.com/forums/forumdisplay.php?f=228\n***\n' + ) + + get_edition = index + '/Home/GetEditionsHierarchy' + edi_data = json.loads(self.index_to_soup(get_edition, raw=True)) + self.log('## For your local edition id, modify this recipe to match your edi_id from the cities below\n') + for edi in edi_data: + if edi['org_location'] in {'Magazines', 'Navya Daily'}: + continue + self.log(edi['org_location']) + cities = [] + for edi_loc in edi['editionlocation']: + cities.append(edi_loc['Editionlocation'] + ' - ' + edi_loc['EditionId']) + self.log('\t', ',\n\t'.join(cities)) + + self.log('\nDownloading: Edition ID - ', edi_id) + url = index + '/Home/GetAllpages?editionid=' + str(edi_id) + '&editiondate=' + today + main_data = json.loads(self.index_to_soup(url, raw=True)) + + feeds_dict = defaultdict(list) + + for page in main_data: + sec_name = page['PageNo'] + 'వ పేజీ' + if page['PageNumber'] == 'Page 1': + self.cover_url = page['HighResolution'] + art = index + '/Home/getingRectangleObject?pageid=' + str(page['PageId']) + raw2 = self.index_to_soup(art, raw=True) + art_data = json.loads(raw2) + for snaps in art_data: + section = sec_name + url = str(snaps['OrgId']) + if snaps['ObjectType'] == 4: + continue + feeds_dict[section].append({"title": '', "url": url}) + return [(section, articles) for section, articles in feeds_dict.items()] + + def preprocess_raw_html(self, raw, *a): + data = json.loads(raw) + body = '' + for x in data['StoryContent']: + if x['Headlines']: + if len(x['Headlines']) > 0: + body += '

' + x['Headlines'][0].replace('\n', ' ') + '

' + for y in x['Headlines'][1:]: + body += '

' + y.replace('\n', ' ') + '

' + if data['LinkPicture']: + for pics in data['LinkPicture']: + if pics['fullpathlinkpic']: + body += '
'.format(pics['fullpathlinkpic']) + if pics['caption']: + body += '
' + pics['caption'] + '

' + for x in data['StoryContent']: + if x['Body'] and x['Body'] != '': + body += '' + x['Body'] + '' + # if data['filepathstorypic']: # this gives you a snap image of the article from page + # body += '

'.format(data['filepathstorypic'].replace('\\', '/')) + if body.strip() == '': + self.abort_article('no article') + return '
' + body + '
' + + def populate_article_metadata(self, article, soup, first): + article.url = '***' + h1 = soup.find('h1') + h4 = soup.find('h4') + body = soup.find(attrs={'class':'body'}) + if h4: + article.summary = self.tag_to_string(h4) + article.text_summary = article.summary + elif body: + article.summary = ' '.join(self.tag_to_string(body).split()[:15]) + '...' + article.text_summary = article.summary + article.title = 'ఆంధ్రజ్యోతి' + if h1: + article.title = self.tag_to_string(h1) + elif body: + article.title = ' '.join(self.tag_to_string(body).split()[:7]) + '...' + + def print_version(self, url): + return index + '/User/ShowArticleView?OrgId=' + url diff --git a/recipes/icons/andhrajyothy_ap.png b/recipes/icons/andhrajyothy_ap.png new file mode 100644 index 0000000000..f0a09c537a Binary files /dev/null and b/recipes/icons/andhrajyothy_ap.png differ diff --git a/recipes/icons/andhrajyothy_tel.png b/recipes/icons/andhrajyothy_tel.png new file mode 100644 index 0000000000..f0a09c537a Binary files /dev/null and b/recipes/icons/andhrajyothy_tel.png differ diff --git a/recipes/toiprint.recipe b/recipes/toiprint.recipe index 3f9f503501..c4399d1bad 100644 --- a/recipes/toiprint.recipe +++ b/recipes/toiprint.recipe @@ -33,8 +33,7 @@ def handle_images(x, soup): img_div.insert_after(cap) else: x.insert_after(img_div) - lead = soup.find('div', attrs={'class':'lead'}) - if lead: + for lead in reversed(soup.findAll('div', attrs={'class':'lead'})): x.insert_after(lead) class toiprint(BasicNewsRecipe): @@ -45,6 +44,7 @@ class toiprint(BasicNewsRecipe): timefmt = ' [' + dt.strftime('%b %d, %Y') + ']' description = 'Articles from the Times of India epaper, digital edition' encoding = 'utf-8' + remove_empty_feeds = True def __init__(self, *args, **kwargs): BasicNewsRecipe.__init__(self, *args, **kwargs)