Skip to content
Snippets Groups Projects
Commit 21248554 authored by Georges Da Costa's avatar Georges Da Costa
Browse files

Updates core rank to 2023, removes a bug concerning certain old journals, adds...

Updates core rank to 2023, removes a bug concerning certain old journals, adds capability to clean the cache
parent af03b3c2
Branches
No related tags found
No related merge requests found
...@@ -4,12 +4,14 @@ import logging ...@@ -4,12 +4,14 @@ import logging
from tqdm import tqdm from tqdm import tqdm
from tqdm.contrib.logging import logging_redirect_tqdm from tqdm.contrib.logging import logging_redirect_tqdm
import os import os
import sys
import datetime import datetime
from dateutil.parser import parse as parsedate from dateutil.parser import parse as parsedate
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
import pandas as pd import pandas as pd
import argparse import argparse
import re import re
import shutil
from get_rankings.hash_cache import load_hash_caches, save_hash_caches, default_cache from get_rankings.hash_cache import load_hash_caches, save_hash_caches, default_cache
from get_rankings.tools import levenshtein, download, get_in_ordered_list from get_rankings.tools import levenshtein, download, get_in_ordered_list
...@@ -48,6 +50,8 @@ def get_dblp(url, cache=True, cache_dir=None): ...@@ -48,6 +50,8 @@ def get_dblp(url, cache=True, cache_dir=None):
def get_core_year(year): def get_core_year(year):
if year >= 2023:
return "CORE2023"
if year >= 2021: if year >= 2021:
return "CORE2021" return "CORE2021"
if year >= 2020: if year >= 2020:
...@@ -98,9 +102,12 @@ class Sjr: ...@@ -98,9 +102,12 @@ class Sjr:
data = download("https://dblp.org/db/journals/%s/index.html" % acronym) data = download("https://dblp.org/db/journals/%s/index.html" % acronym)
soup = BeautifulSoup(data, "html.parser") soup = BeautifulSoup(data, "html.parser")
full_name = soup.find("h1").text full_name = soup.find("h1").text
issn = soup.find( try:
"a", attrs={"href": re.compile("^https://portal.issn.org/resource/ISSN/")} issn = soup.find(
).text "a", attrs={"href": re.compile("^https://portal.issn.org/resource/ISSN/")}
).text
except:
issn = None
return (full_name, issn) return (full_name, issn)
def get(self, name, second_name, year): def get(self, name, second_name, year):
...@@ -110,13 +117,15 @@ class Sjr: ...@@ -110,13 +117,15 @@ class Sjr:
_ , issn = self.get_issn(second_name) _ , issn = self.get_issn(second_name)
rankings = self.get_sjr_rank(issn) rankings = self.get_sjr_rank(issn)
self.ranking_caches[(name, second_name)] = rankings self.ranking_caches[(name, second_name)] = rankings
rank = get_in_ordered_list(rankings, int(year)) rank = None if rankings is None else get_in_ordered_list(rankings, int(year))
if rank is None: if rank is None:
return ["J", name, second_name, int(year), None, None, None] return ["J", name, second_name, int(year), None, None, None]
else: else:
return ["J", name, second_name, int(year), rank[1], None, rank[2]] return ["J", name, second_name, int(year), rank[1], None, rank[2]]
def get_sjr_rank(self, name): def get_sjr_rank(self, name):
if name is None:
return None
url = "https://www.scimagojr.com/journalsearch.php?q=%s" % name.replace( url = "https://www.scimagojr.com/journalsearch.php?q=%s" % name.replace(
" ", "+" " ", "+"
) )
...@@ -172,7 +181,7 @@ def main(): ...@@ -172,7 +181,7 @@ def main():
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description="Get ranking from DBLP and show a small summary" description="Get ranking from DBLP and show a small summary"
) )
parser.add_argument("url", help="DBLP url") parser.add_argument("url", help="DBLP url (or use clear-cache to clear the cache, is should be done regularly)")
parser.add_argument("--start", type=int, default=-1, help="starting year") parser.add_argument("--start", type=int, default=-1, help="starting year")
parser.add_argument("--end", type=int, default=10000, help="ending year") parser.add_argument("--end", type=int, default=10000, help="ending year")
parser.add_argument( parser.add_argument(
...@@ -206,6 +215,13 @@ def main(): ...@@ -206,6 +215,13 @@ def main():
display_list = args.d display_list = args.d
logging.basicConfig(level=args.loglevel, format="%(levelname)s %(message)s") logging.basicConfig(level=args.loglevel, format="%(levelname)s %(message)s")
if args.url == 'clear-cache':
cache_dir = default_cache()
print("Cleaning the cache :", cache_dir);
shutil.rmtree(cache_dir)
print("Cache clear");
sys.exit(0)
username, elements = get_dblp(url) username, elements = get_dblp(url)
# Keeps only elements in the requested range # Keeps only elements in the requested range
......
...@@ -5,7 +5,7 @@ with open("README.md", "r") as fh: ...@@ -5,7 +5,7 @@ with open("README.md", "r") as fh:
setuptools.setup( setuptools.setup(
name="get_rankings", name="get_rankings",
version="0.8", version="0.9",
author="Georges Da Costa", author="Georges Da Costa",
author_email="georges.da-costa@irit.fr", author_email="georges.da-costa@irit.fr",
description="DBLP ranking using CORE Rank and SJR", description="DBLP ranking using CORE Rank and SJR",
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment