diff --git a/get_rankings/get_rankings.py b/get_rankings/get_rankings.py index 206fa7480ba43e2f9dae0a8ed849702be8c58346..a0f77f3693fd9ec42e57805127352969c5852262 100755 --- a/get_rankings/get_rankings.py +++ b/get_rankings/get_rankings.py @@ -4,12 +4,14 @@ import logging from tqdm import tqdm from tqdm.contrib.logging import logging_redirect_tqdm import os +import sys import datetime from dateutil.parser import parse as parsedate from bs4 import BeautifulSoup import pandas as pd import argparse import re +import shutil from get_rankings.hash_cache import load_hash_caches, save_hash_caches, default_cache from get_rankings.tools import levenshtein, download, get_in_ordered_list @@ -48,6 +50,8 @@ def get_dblp(url, cache=True, cache_dir=None): def get_core_year(year): + if year >= 2023: + return "CORE2023" if year >= 2021: return "CORE2021" if year >= 2020: @@ -98,9 +102,12 @@ class Sjr: data = download("https://dblp.org/db/journals/%s/index.html" % acronym) soup = BeautifulSoup(data, "html.parser") full_name = soup.find("h1").text - issn = soup.find( - "a", attrs={"href": re.compile("^https://portal.issn.org/resource/ISSN/")} - ).text + try: + issn = soup.find( + "a", attrs={"href": re.compile("^https://portal.issn.org/resource/ISSN/")} + ).text + except: + issn = None return (full_name, issn) def get(self, name, second_name, year): @@ -110,13 +117,15 @@ class Sjr: _ , issn = self.get_issn(second_name) rankings = self.get_sjr_rank(issn) self.ranking_caches[(name, second_name)] = rankings - rank = get_in_ordered_list(rankings, int(year)) + rank = None if rankings is None else get_in_ordered_list(rankings, int(year)) if rank is None: return ["J", name, second_name, int(year), None, None, None] else: return ["J", name, second_name, int(year), rank[1], None, rank[2]] def get_sjr_rank(self, name): + if name is None: + return None url = "https://www.scimagojr.com/journalsearch.php?q=%s" % name.replace( " ", "+" ) @@ -172,7 +181,7 @@ def main(): parser = argparse.ArgumentParser( description="Get ranking from DBLP and show a small summary" ) - parser.add_argument("url", help="DBLP url") + parser.add_argument("url", help="DBLP url (or use clear-cache to clear the cache, is should be done regularly)") parser.add_argument("--start", type=int, default=-1, help="starting year") parser.add_argument("--end", type=int, default=10000, help="ending year") parser.add_argument( @@ -206,6 +215,13 @@ def main(): display_list = args.d logging.basicConfig(level=args.loglevel, format="%(levelname)s %(message)s") + if args.url == 'clear-cache': + cache_dir = default_cache() + print("Cleaning the cache :", cache_dir); + shutil.rmtree(cache_dir) + print("Cache clear"); + sys.exit(0) + username, elements = get_dblp(url) # Keeps only elements in the requested range diff --git a/setup.py b/setup.py index 16a6016063d6abebda1c3061b365449ada02f857..caf15888f21d551e191b2dbcfed7581789ea8265 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ with open("README.md", "r") as fh: setuptools.setup( name="get_rankings", - version="0.8", + version="0.9", author="Georges Da Costa", author_email="georges.da-costa@irit.fr", description="DBLP ranking using CORE Rank and SJR",