Skip to content
Snippets Groups Projects
Commit 21248554 authored by Georges Da Costa's avatar Georges Da Costa
Browse files

Updates core rank to 2023, removes a bug concerning certain old journals, adds...

Updates core rank to 2023, removes a bug concerning certain old journals, adds capability to clean the cache
parent af03b3c2
Branches
No related tags found
No related merge requests found
......@@ -4,12 +4,14 @@ import logging
from tqdm import tqdm
from tqdm.contrib.logging import logging_redirect_tqdm
import os
import sys
import datetime
from dateutil.parser import parse as parsedate
from bs4 import BeautifulSoup
import pandas as pd
import argparse
import re
import shutil
from get_rankings.hash_cache import load_hash_caches, save_hash_caches, default_cache
from get_rankings.tools import levenshtein, download, get_in_ordered_list
......@@ -48,6 +50,8 @@ def get_dblp(url, cache=True, cache_dir=None):
def get_core_year(year):
if year >= 2023:
return "CORE2023"
if year >= 2021:
return "CORE2021"
if year >= 2020:
......@@ -98,9 +102,12 @@ class Sjr:
data = download("https://dblp.org/db/journals/%s/index.html" % acronym)
soup = BeautifulSoup(data, "html.parser")
full_name = soup.find("h1").text
issn = soup.find(
"a", attrs={"href": re.compile("^https://portal.issn.org/resource/ISSN/")}
).text
try:
issn = soup.find(
"a", attrs={"href": re.compile("^https://portal.issn.org/resource/ISSN/")}
).text
except:
issn = None
return (full_name, issn)
def get(self, name, second_name, year):
......@@ -110,13 +117,15 @@ class Sjr:
_ , issn = self.get_issn(second_name)
rankings = self.get_sjr_rank(issn)
self.ranking_caches[(name, second_name)] = rankings
rank = get_in_ordered_list(rankings, int(year))
rank = None if rankings is None else get_in_ordered_list(rankings, int(year))
if rank is None:
return ["J", name, second_name, int(year), None, None, None]
else:
return ["J", name, second_name, int(year), rank[1], None, rank[2]]
def get_sjr_rank(self, name):
if name is None:
return None
url = "https://www.scimagojr.com/journalsearch.php?q=%s" % name.replace(
" ", "+"
)
......@@ -172,7 +181,7 @@ def main():
parser = argparse.ArgumentParser(
description="Get ranking from DBLP and show a small summary"
)
parser.add_argument("url", help="DBLP url")
parser.add_argument("url", help="DBLP url (or use clear-cache to clear the cache, is should be done regularly)")
parser.add_argument("--start", type=int, default=-1, help="starting year")
parser.add_argument("--end", type=int, default=10000, help="ending year")
parser.add_argument(
......@@ -206,6 +215,13 @@ def main():
display_list = args.d
logging.basicConfig(level=args.loglevel, format="%(levelname)s %(message)s")
if args.url == 'clear-cache':
cache_dir = default_cache()
print("Cleaning the cache :", cache_dir);
shutil.rmtree(cache_dir)
print("Cache clear");
sys.exit(0)
username, elements = get_dblp(url)
# Keeps only elements in the requested range
......
......@@ -5,7 +5,7 @@ with open("README.md", "r") as fh:
setuptools.setup(
name="get_rankings",
version="0.8",
version="0.9",
author="Georges Da Costa",
author_email="georges.da-costa@irit.fr",
description="DBLP ranking using CORE Rank and SJR",
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment