mirror of
				https://github.com/kovidgoyal/calibre.git
				synced 2025-11-04 03:27:00 -05:00 
			
		
		
		
	Also retrieve plugin category when mirroring plugins
This commit is contained in:
		
							parent
							
								
									03a38da996
								
							
						
					
					
						commit
						20afcedbd8
					
				@ -56,7 +56,7 @@ PLUGINS = 'plugins.json.bz2'
 | 
				
			|||||||
INDEX = MR_URL + 'showpost.php?p=1362767&postcount=1'
 | 
					INDEX = MR_URL + 'showpost.php?p=1362767&postcount=1'
 | 
				
			||||||
# INDEX = 'file:///t/raw.html'
 | 
					# INDEX = 'file:///t/raw.html'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
IndexEntry = namedtuple('IndexEntry', 'name url donate history uninstall deprecated thread_id')
 | 
					IndexEntry = namedtuple('IndexEntry', 'name url donate history uninstall deprecated thread_id category')
 | 
				
			||||||
socket.setdefaulttimeout(30)
 | 
					socket.setdefaulttimeout(30)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -103,16 +103,33 @@ def url_to_plugin_id(url, deprecated):
 | 
				
			|||||||
def parse_index(raw=None):  # {{{
 | 
					def parse_index(raw=None):  # {{{
 | 
				
			||||||
    raw = raw or read(INDEX).decode('utf-8', 'replace')
 | 
					    raw = raw or read(INDEX).decode('utf-8', 'replace')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    dep_start = raw.index('>Deprecated/Renamed/Retired Plugins:<')
 | 
					 | 
				
			||||||
    dpat = re.compile(r'''(?is)Donate\s*:\s*<a\s+href=['"](.+?)['"]''')
 | 
					    dpat = re.compile(r'''(?is)Donate\s*:\s*<a\s+href=['"](.+?)['"]''')
 | 
				
			||||||
    key_pat = re.compile(r'''(?is)(History|Uninstall)\s*:\s*([^<;]+)[<;]''')
 | 
					    key_pat = re.compile(r'''(?is)(History|Uninstall)\s*:\s*([^<;]+)[<;]''')
 | 
				
			||||||
    seen = {}
 | 
					    seen = {}
 | 
				
			||||||
 | 
					    dep_start = -1
 | 
				
			||||||
 | 
					    category_offsets = []
 | 
				
			||||||
 | 
					    deprecated_category = 'Deprecated/Renamed/Retired'
 | 
				
			||||||
 | 
					    for match in re.finditer(r'''<b>\s*(.+?)\s*Plugins:?\s*</b>''', raw):
 | 
				
			||||||
 | 
					        category = match.group(1).strip()
 | 
				
			||||||
 | 
					        category_offsets.append((category, match.start()))
 | 
				
			||||||
 | 
					        if category == deprecated_category:
 | 
				
			||||||
 | 
					            dep_start = match.start()
 | 
				
			||||||
 | 
					    if dep_start < 1:
 | 
				
			||||||
 | 
					        raise ValueError('Could not find start of deprecated plugins')
 | 
				
			||||||
 | 
					    category_offsets = tuple(reversed(category_offsets))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def category_at(offset):
 | 
				
			||||||
 | 
					        for category, q in category_offsets:
 | 
				
			||||||
 | 
					            if offset >= q:
 | 
				
			||||||
 | 
					                return category
 | 
				
			||||||
 | 
					        raise ValueError(f'Could not find category for offset: {offset}')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for match in re.finditer(r'''(?is)<li.+?<a\s+href=['"](https://www.mobileread.com/forums/showthread.php\?[pt]=\d+).+?>(.+?)<(.+?)</li>''', raw):
 | 
					    for match in re.finditer(r'''(?is)<li.+?<a\s+href=['"](https://www.mobileread.com/forums/showthread.php\?[pt]=\d+).+?>(.+?)<(.+?)</li>''', raw):
 | 
				
			||||||
        deprecated = match.start() > dep_start
 | 
					        name, url, rest = u(match.group(2)), u(match.group(1)), match.group(3)
 | 
				
			||||||
 | 
					        category = category_at(match.start(2))
 | 
				
			||||||
 | 
					        deprecated = category == deprecated_category
 | 
				
			||||||
        donate = uninstall = None
 | 
					        donate = uninstall = None
 | 
				
			||||||
        history = False
 | 
					        history = False
 | 
				
			||||||
        name, url, rest = u(match.group(2)), u(match.group(1)), match.group(3)
 | 
					 | 
				
			||||||
        m = dpat.search(rest)
 | 
					        m = dpat.search(rest)
 | 
				
			||||||
        if m is not None:
 | 
					        if m is not None:
 | 
				
			||||||
            donate = u(m.group(1))
 | 
					            donate = u(m.group(1))
 | 
				
			||||||
@ -127,7 +144,7 @@ def parse_index(raw=None):  # {{{
 | 
				
			|||||||
        if thread_id in seen:
 | 
					        if thread_id in seen:
 | 
				
			||||||
            raise ValueError(f'thread_id for {seen[thread_id]} and {name} is the same: {thread_id}')
 | 
					            raise ValueError(f'thread_id for {seen[thread_id]} and {name} is the same: {thread_id}')
 | 
				
			||||||
        seen[thread_id] = name
 | 
					        seen[thread_id] = name
 | 
				
			||||||
        entry = IndexEntry(name, url, donate, history, uninstall, deprecated, thread_id)
 | 
					        entry = IndexEntry(name, url, donate, history, uninstall, deprecated, thread_id, category)
 | 
				
			||||||
        yield entry
 | 
					        yield entry
 | 
				
			||||||
# }}}
 | 
					# }}}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -362,6 +379,7 @@ def get_plugin_info(raw_zip):
 | 
				
			|||||||
def update_plugin_from_entry(plugin, entry):
 | 
					def update_plugin_from_entry(plugin, entry):
 | 
				
			||||||
    plugin['index_name'] = entry.name
 | 
					    plugin['index_name'] = entry.name
 | 
				
			||||||
    plugin['thread_url'] = entry.url
 | 
					    plugin['thread_url'] = entry.url
 | 
				
			||||||
 | 
					    plugin['category'] = entry.category
 | 
				
			||||||
    for x in ('donate', 'history', 'deprecated', 'uninstall', 'thread_id'):
 | 
					    for x in ('donate', 'history', 'deprecated', 'uninstall', 'thread_id'):
 | 
				
			||||||
        plugin[x] = getattr(entry, x)
 | 
					        plugin[x] = getattr(entry, x)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -650,61 +668,6 @@ def main():
 | 
				
			|||||||
        raise SystemExit(1)
 | 
					        raise SystemExit(1)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def test_parse():  # {{{
 | 
					 | 
				
			||||||
    raw = read(INDEX).decode('utf-8', 'replace')
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    old_entries = []
 | 
					 | 
				
			||||||
    from lxml import html
 | 
					 | 
				
			||||||
    root = html.fromstring(raw)
 | 
					 | 
				
			||||||
    list_nodes = root.xpath('//div[@id="post_message_1362767"]/ul/li')
 | 
					 | 
				
			||||||
    # Add our deprecated plugins which are nested in a grey span
 | 
					 | 
				
			||||||
    list_nodes.extend(root.xpath('//div[@id="post_message_1362767"]/span/ul/li'))
 | 
					 | 
				
			||||||
    for list_node in list_nodes:
 | 
					 | 
				
			||||||
        name = list_node.xpath('a')[0].text_content().strip()
 | 
					 | 
				
			||||||
        url = list_node.xpath('a/@href')[0].strip()
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        description_text = list_node.xpath('i')[0].text_content()
 | 
					 | 
				
			||||||
        description_parts = description_text.partition('Version:')
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        details_text = description_parts[1] + description_parts[2].replace('\r\n','')
 | 
					 | 
				
			||||||
        details_pairs = details_text.split(';')
 | 
					 | 
				
			||||||
        details = {}
 | 
					 | 
				
			||||||
        for details_pair in details_pairs:
 | 
					 | 
				
			||||||
            pair = details_pair.split(':')
 | 
					 | 
				
			||||||
            if len(pair) == 2:
 | 
					 | 
				
			||||||
                key = pair[0].strip().lower()
 | 
					 | 
				
			||||||
                value = pair[1].strip()
 | 
					 | 
				
			||||||
                details[key] = value
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        donation_node = list_node.xpath('i/span/a/@href')
 | 
					 | 
				
			||||||
        donate = donation_node[0] if donation_node else None
 | 
					 | 
				
			||||||
        uninstall = tuple(x.strip() for x in details.get('uninstall', '').strip().split(',') if x.strip()) or None
 | 
					 | 
				
			||||||
        history = details.get('history', 'No').lower() in ['yes', 'true']
 | 
					 | 
				
			||||||
        deprecated = details.get('deprecated', 'No').lower() in ['yes', 'true']
 | 
					 | 
				
			||||||
        old_entries.append(IndexEntry(name, url, donate, history, uninstall, deprecated, url_to_plugin_id(url, deprecated)))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    new_entries = tuple(parse_index(raw))
 | 
					 | 
				
			||||||
    for i, entry in enumerate(old_entries):
 | 
					 | 
				
			||||||
        if entry != new_entries[i]:
 | 
					 | 
				
			||||||
            print(f'The new entry: {new_entries[i]} != {entry}')
 | 
					 | 
				
			||||||
            raise SystemExit(1)
 | 
					 | 
				
			||||||
    pool = ThreadPool(processes=20)
 | 
					 | 
				
			||||||
    urls = [e.url for e in new_entries]
 | 
					 | 
				
			||||||
    data = pool.map(read, urls)
 | 
					 | 
				
			||||||
    for url, raw in zip(urls, data):
 | 
					 | 
				
			||||||
        sys.stdout.flush()
 | 
					 | 
				
			||||||
        root = html.fromstring(raw)
 | 
					 | 
				
			||||||
        attachment_nodes = root.xpath('//fieldset/table/tr/td/a')
 | 
					 | 
				
			||||||
        full_url = None
 | 
					 | 
				
			||||||
        for attachment_node in attachment_nodes:
 | 
					 | 
				
			||||||
            filename = attachment_node.text_content().lower()
 | 
					 | 
				
			||||||
            if filename.find('.zip') != -1:
 | 
					 | 
				
			||||||
                full_url = MR_URL + attachment_node.attrib['href']
 | 
					 | 
				
			||||||
                break
 | 
					 | 
				
			||||||
        new_url, aname = parse_plugin_zip_url(raw)
 | 
					 | 
				
			||||||
        if new_url != full_url:
 | 
					 | 
				
			||||||
            print(f'new url ({aname}): {new_url} != {full_url} for plugin at: {url}')
 | 
					 | 
				
			||||||
            raise SystemExit(1)
 | 
					 | 
				
			||||||
# }}}
 | 
					# }}}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -746,4 +709,6 @@ if __name__ == '__main__':
 | 
				
			|||||||
    # import pprint
 | 
					    # import pprint
 | 
				
			||||||
    # pprint.pprint(get_plugin_info(open(sys.argv[-1], 'rb').read()))
 | 
					    # pprint.pprint(get_plugin_info(open(sys.argv[-1], 'rb').read()))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # print('\n'.join(map(str, parse_index())))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    main()
 | 
					    main()
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user