Skip to content
Snippets Groups Projects
Commit d4dca0bb authored by nicolas.ollinger's avatar nicolas.ollinger
Browse files

passe-moi le debug

parent 397f96b2
No related branches found
No related tags found
1 merge request!1Improves date behavior
#!/usr/bin/env python3 #!/usr/bin/env python3
import logging
from tqdm import tqdm from tqdm import tqdm
from tqdm.contrib.logging import logging_redirect_tqdm
import os import os
import requests import requests
import datetime import datetime
...@@ -12,6 +14,45 @@ import json ...@@ -12,6 +14,45 @@ import json
import argparse import argparse
LOG = logging.getLogger(__name__)
def getwithpb(url):
LOG.info(f"fetching {url}")
r = requests.get(url, stream=True)
data = b""
total_size = int(r.headers.get("content-length", 0))
for chunk in tqdm(
r.iter_content(32 * 1024),
total=total_size,
unit="B",
unit_scale=True,
leave=False,
):
if chunk:
data += chunk
return data
def fgetwithpb(url, filename):
LOG.info(f"fetching {url}")
r = requests.get(url, stream=True)
data = b""
total_size = int(r.headers.get("content-length", 0))
with open(filename, "wb") as file:
for chunk in tqdm(
r.iter_content(32 * 1024),
total=total_size,
unit="B",
unit_scale=True,
leave=False,
):
if chunk:
file.write(chunk)
data += chunk
return data
def comp_lower(a, b): def comp_lower(a, b):
return isinstance(a, str) and isinstance(b, str) and a.lower() == b.lower() return isinstance(a, str) and isinstance(b, str) and a.lower() == b.lower()
...@@ -27,10 +68,7 @@ def get_dblp(url, cache=True, cache_dir=None): ...@@ -27,10 +68,7 @@ def get_dblp(url, cache=True, cache_dir=None):
filename = "%s/%s" % (cache_dir, target.replace("/", "_")) filename = "%s/%s" % (cache_dir, target.replace("/", "_"))
os.makedirs(cache_dir, exist_ok=True) os.makedirs(cache_dir, exist_ok=True)
if not os.path.exists(filename) or not cache: if not os.path.exists(filename) or not cache:
with open(filename, "wb") as file: data = fgetwithpb(url, filename)
response = requests.get(url)
data = response.content
file.write(data)
else: else:
with open(filename, "rb") as file: with open(filename, "rb") as file:
data = file.read() data = file.read()
...@@ -77,8 +115,7 @@ def get_core_rank(name, year): ...@@ -77,8 +115,7 @@ def get_core_rank(name, year):
source, source,
) )
response = requests.get(url) data = getwithpb(url)
data = response.content
cc_soup = BeautifulSoup(data, "html.parser") cc_soup = BeautifulSoup(data, "html.parser")
table = cc_soup.find_all("table") table = cc_soup.find_all("table")
if len(table) == 0: if len(table) == 0:
...@@ -166,8 +203,7 @@ def get_sjr_in_cache(rankings, str_year): ...@@ -166,8 +203,7 @@ def get_sjr_in_cache(rankings, str_year):
def get_sjr_rank(name): def get_sjr_rank(name):
url = "https://www.scimagojr.com/journalsearch.php?q=%s" % name.replace(" ", "+") url = "https://www.scimagojr.com/journalsearch.php?q=%s" % name.replace(" ", "+")
response = requests.get(url) data = getwithpb(url)
data = response.content
sjr_soup = BeautifulSoup(data, "html.parser") sjr_soup = BeautifulSoup(data, "html.parser")
revues = sjr_soup.find("div", class_="search_results") revues = sjr_soup.find("div", class_="search_results")
...@@ -184,8 +220,7 @@ def get_sjr_rank(name): ...@@ -184,8 +220,7 @@ def get_sjr_rank(name):
if reference is None: if reference is None:
return [] return []
response = requests.get(reference) data = getwithpb(reference)
data = response.content
sjr_soup = BeautifulSoup(data, "html.parser") sjr_soup = BeautifulSoup(data, "html.parser")
table = sjr_soup.find_all("table") table = sjr_soup.find_all("table")
if len(table) == 0: if len(table) == 0:
...@@ -226,6 +261,22 @@ def main(): ...@@ -226,6 +261,22 @@ def main():
parser.add_argument( parser.add_argument(
"-d", action="store_true", help="display conference and journal list" "-d", action="store_true", help="display conference and journal list"
) )
parser.add_argument(
"--debug",
help="Print lots of debugging statements",
action="store_const",
dest="loglevel",
const=logging.DEBUG,
default=logging.WARNING,
)
parser.add_argument(
"-v",
"--verbose",
help="Be verbose",
action="store_const",
dest="loglevel",
const=logging.INFO,
)
args = parser.parse_args() args = parser.parse_args()
url = args.url url = args.url
...@@ -233,40 +284,57 @@ def main(): ...@@ -233,40 +284,57 @@ def main():
csv_output = args.o csv_output = args.o
start_year = args.start start_year = args.start
display_list = args.d display_list = args.d
logging.basicConfig(level=args.loglevel, format="%(levelname)s %(message)s")
username, elements = get_dblp(url) username, elements = get_dblp(url)
print(username) print(username)
result = [] result = []
for venue, name, second_name, year in tqdm(elements): with logging_redirect_tqdm():
if venue == "conf": for venue, name, second_name, year in tqdm(elements):
if (name, second_name, year) in core_ranking_caches: if start_year <= int(year) <= end_year:
rank = core_ranking_caches[(name, second_name, year)] if venue == "conf":
else: if (name, second_name, year) in core_ranking_caches:
rank = get_core_rank(name, year) rank = core_ranking_caches[(name, second_name, year)]
if rank is None: else:
rank = get_core_rank(second_name, year) rank = get_core_rank(name, year)
core_ranking_caches[(name, second_name, year)] = rank if rank is None:
if rank is None: rank = get_core_rank(second_name, year)
result.append(["C", name, second_name, int(year), None, None, None]) core_ranking_caches[(name, second_name, year)] = rank
else: if rank is None:
result.append( result.append(
["C", name, second_name, int(year), rank[1], rank[2], rank[0]] ["C", name, second_name, int(year), None, None, None]
) )
else:
else: result.append(
if (name, second_name) in sjr_ranking_caches: [
rankings = sjr_ranking_caches[(name, second_name)] "C",
else: name,
rankings = get_sjr_rank(name) second_name,
sjr_ranking_caches[(name, second_name)] = rankings int(year),
rank = get_sjr_in_cache(rankings, year) rank[1],
if rank is None: rank[2],
result.append(["J", name, second_name, int(year), None, None, None]) rank[0],
else: ]
result.append( )
["J", name, second_name, int(year), rank[1], None, rank[2]]
) elif venue == "journals":
if (name, second_name) in sjr_ranking_caches:
rankings = sjr_ranking_caches[(name, second_name)]
else:
rankings = get_sjr_rank(name)
sjr_ranking_caches[(name, second_name)] = rankings
rank = get_sjr_in_cache(rankings, year)
if rank is None:
result.append(
["J", name, second_name, int(year), None, None, None]
)
else:
result.append(
["J", name, second_name, int(year), rank[1], None, rank[2]]
)
else:
tqdm.write(f"venue: {venue} ?")
save_ranking_caches(sjr_ranking_caches, "sjr") save_ranking_caches(sjr_ranking_caches, "sjr")
save_ranking_caches(core_ranking_caches, "core") save_ranking_caches(core_ranking_caches, "core")
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment