Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
G
Get Rankings
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
dacosta
Get Rankings
Commits
d4dca0bb
Commit
d4dca0bb
authored
2 years ago
by
nicolas.ollinger
Browse files
Options
Downloads
Patches
Plain Diff
passe-moi le debug
parent
397f96b2
No related branches found
No related tags found
1 merge request
!1
Improves date behavior
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
get_rankings/get_rankings.py
+107
-39
107 additions, 39 deletions
get_rankings/get_rankings.py
with
107 additions
and
39 deletions
get_rankings/get_rankings.py
+
107
−
39
View file @
d4dca0bb
#!/usr/bin/env python3
#!/usr/bin/env python3
import
logging
from
tqdm
import
tqdm
from
tqdm
import
tqdm
from
tqdm.contrib.logging
import
logging_redirect_tqdm
import
os
import
os
import
requests
import
requests
import
datetime
import
datetime
...
@@ -12,6 +14,45 @@ import json
...
@@ -12,6 +14,45 @@ import json
import
argparse
import
argparse
LOG
=
logging
.
getLogger
(
__name__
)
def
getwithpb
(
url
):
LOG
.
info
(
f
"
fetching
{
url
}
"
)
r
=
requests
.
get
(
url
,
stream
=
True
)
data
=
b
""
total_size
=
int
(
r
.
headers
.
get
(
"
content-length
"
,
0
))
for
chunk
in
tqdm
(
r
.
iter_content
(
32
*
1024
),
total
=
total_size
,
unit
=
"
B
"
,
unit_scale
=
True
,
leave
=
False
,
):
if
chunk
:
data
+=
chunk
return
data
def
fgetwithpb
(
url
,
filename
):
LOG
.
info
(
f
"
fetching
{
url
}
"
)
r
=
requests
.
get
(
url
,
stream
=
True
)
data
=
b
""
total_size
=
int
(
r
.
headers
.
get
(
"
content-length
"
,
0
))
with
open
(
filename
,
"
wb
"
)
as
file
:
for
chunk
in
tqdm
(
r
.
iter_content
(
32
*
1024
),
total
=
total_size
,
unit
=
"
B
"
,
unit_scale
=
True
,
leave
=
False
,
):
if
chunk
:
file
.
write
(
chunk
)
data
+=
chunk
return
data
def
comp_lower
(
a
,
b
):
def
comp_lower
(
a
,
b
):
return
isinstance
(
a
,
str
)
and
isinstance
(
b
,
str
)
and
a
.
lower
()
==
b
.
lower
()
return
isinstance
(
a
,
str
)
and
isinstance
(
b
,
str
)
and
a
.
lower
()
==
b
.
lower
()
...
@@ -27,10 +68,7 @@ def get_dblp(url, cache=True, cache_dir=None):
...
@@ -27,10 +68,7 @@ def get_dblp(url, cache=True, cache_dir=None):
filename
=
"
%s/%s
"
%
(
cache_dir
,
target
.
replace
(
"
/
"
,
"
_
"
))
filename
=
"
%s/%s
"
%
(
cache_dir
,
target
.
replace
(
"
/
"
,
"
_
"
))
os
.
makedirs
(
cache_dir
,
exist_ok
=
True
)
os
.
makedirs
(
cache_dir
,
exist_ok
=
True
)
if
not
os
.
path
.
exists
(
filename
)
or
not
cache
:
if
not
os
.
path
.
exists
(
filename
)
or
not
cache
:
with
open
(
filename
,
"
wb
"
)
as
file
:
data
=
fgetwithpb
(
url
,
filename
)
response
=
requests
.
get
(
url
)
data
=
response
.
content
file
.
write
(
data
)
else
:
else
:
with
open
(
filename
,
"
rb
"
)
as
file
:
with
open
(
filename
,
"
rb
"
)
as
file
:
data
=
file
.
read
()
data
=
file
.
read
()
...
@@ -77,8 +115,7 @@ def get_core_rank(name, year):
...
@@ -77,8 +115,7 @@ def get_core_rank(name, year):
source
,
source
,
)
)
response
=
requests
.
get
(
url
)
data
=
getwithpb
(
url
)
data
=
response
.
content
cc_soup
=
BeautifulSoup
(
data
,
"
html.parser
"
)
cc_soup
=
BeautifulSoup
(
data
,
"
html.parser
"
)
table
=
cc_soup
.
find_all
(
"
table
"
)
table
=
cc_soup
.
find_all
(
"
table
"
)
if
len
(
table
)
==
0
:
if
len
(
table
)
==
0
:
...
@@ -166,8 +203,7 @@ def get_sjr_in_cache(rankings, str_year):
...
@@ -166,8 +203,7 @@ def get_sjr_in_cache(rankings, str_year):
def
get_sjr_rank
(
name
):
def
get_sjr_rank
(
name
):
url
=
"
https://www.scimagojr.com/journalsearch.php?q=%s
"
%
name
.
replace
(
"
"
,
"
+
"
)
url
=
"
https://www.scimagojr.com/journalsearch.php?q=%s
"
%
name
.
replace
(
"
"
,
"
+
"
)
response
=
requests
.
get
(
url
)
data
=
getwithpb
(
url
)
data
=
response
.
content
sjr_soup
=
BeautifulSoup
(
data
,
"
html.parser
"
)
sjr_soup
=
BeautifulSoup
(
data
,
"
html.parser
"
)
revues
=
sjr_soup
.
find
(
"
div
"
,
class_
=
"
search_results
"
)
revues
=
sjr_soup
.
find
(
"
div
"
,
class_
=
"
search_results
"
)
...
@@ -184,8 +220,7 @@ def get_sjr_rank(name):
...
@@ -184,8 +220,7 @@ def get_sjr_rank(name):
if
reference
is
None
:
if
reference
is
None
:
return
[]
return
[]
response
=
requests
.
get
(
reference
)
data
=
getwithpb
(
reference
)
data
=
response
.
content
sjr_soup
=
BeautifulSoup
(
data
,
"
html.parser
"
)
sjr_soup
=
BeautifulSoup
(
data
,
"
html.parser
"
)
table
=
sjr_soup
.
find_all
(
"
table
"
)
table
=
sjr_soup
.
find_all
(
"
table
"
)
if
len
(
table
)
==
0
:
if
len
(
table
)
==
0
:
...
@@ -226,6 +261,22 @@ def main():
...
@@ -226,6 +261,22 @@ def main():
parser
.
add_argument
(
parser
.
add_argument
(
"
-d
"
,
action
=
"
store_true
"
,
help
=
"
display conference and journal list
"
"
-d
"
,
action
=
"
store_true
"
,
help
=
"
display conference and journal list
"
)
)
parser
.
add_argument
(
"
--debug
"
,
help
=
"
Print lots of debugging statements
"
,
action
=
"
store_const
"
,
dest
=
"
loglevel
"
,
const
=
logging
.
DEBUG
,
default
=
logging
.
WARNING
,
)
parser
.
add_argument
(
"
-v
"
,
"
--verbose
"
,
help
=
"
Be verbose
"
,
action
=
"
store_const
"
,
dest
=
"
loglevel
"
,
const
=
logging
.
INFO
,
)
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
url
=
args
.
url
url
=
args
.
url
...
@@ -233,40 +284,57 @@ def main():
...
@@ -233,40 +284,57 @@ def main():
csv_output
=
args
.
o
csv_output
=
args
.
o
start_year
=
args
.
start
start_year
=
args
.
start
display_list
=
args
.
d
display_list
=
args
.
d
logging
.
basicConfig
(
level
=
args
.
loglevel
,
format
=
"
%(levelname)s %(message)s
"
)
username
,
elements
=
get_dblp
(
url
)
username
,
elements
=
get_dblp
(
url
)
print
(
username
)
print
(
username
)
result
=
[]
result
=
[]
for
venue
,
name
,
second_name
,
year
in
tqdm
(
elements
):
with
logging_redirect_tqdm
():
if
venue
==
"
conf
"
:
for
venue
,
name
,
second_name
,
year
in
tqdm
(
elements
):
if
(
name
,
second_name
,
year
)
in
core_ranking_caches
:
if
start_year
<=
int
(
year
)
<=
end_year
:
rank
=
core_ranking_caches
[(
name
,
second_name
,
year
)]
if
venue
==
"
conf
"
:
else
:
if
(
name
,
second_name
,
year
)
in
core_ranking_caches
:
rank
=
get_core_rank
(
name
,
year
)
rank
=
core_ranking_caches
[(
name
,
second_name
,
year
)]
if
rank
is
None
:
else
:
rank
=
get_core_rank
(
second_name
,
year
)
rank
=
get_core_rank
(
name
,
year
)
core_ranking_caches
[(
name
,
second_name
,
year
)]
=
rank
if
rank
is
None
:
if
rank
is
None
:
rank
=
get_core_rank
(
second_name
,
year
)
result
.
append
([
"
C
"
,
name
,
second_name
,
int
(
year
),
None
,
None
,
None
])
core_ranking_caches
[(
name
,
second_name
,
year
)]
=
rank
else
:
if
rank
is
None
:
result
.
append
(
result
.
append
(
[
"
C
"
,
name
,
second_name
,
int
(
year
),
rank
[
1
],
rank
[
2
],
rank
[
0
]]
[
"
C
"
,
name
,
second_name
,
int
(
year
),
None
,
None
,
None
]
)
)
else
:
else
:
result
.
append
(
if
(
name
,
second_name
)
in
sjr_ranking_caches
:
[
rankings
=
sjr_ranking_caches
[(
name
,
second_name
)]
"
C
"
,
else
:
name
,
rankings
=
get_sjr_rank
(
name
)
second_name
,
sjr_ranking_caches
[(
name
,
second_name
)]
=
rankings
int
(
year
),
rank
=
get_sjr_in_cache
(
rankings
,
year
)
rank
[
1
],
if
rank
is
None
:
rank
[
2
],
result
.
append
([
"
J
"
,
name
,
second_name
,
int
(
year
),
None
,
None
,
None
])
rank
[
0
],
else
:
]
result
.
append
(
)
[
"
J
"
,
name
,
second_name
,
int
(
year
),
rank
[
1
],
None
,
rank
[
2
]]
)
elif
venue
==
"
journals
"
:
if
(
name
,
second_name
)
in
sjr_ranking_caches
:
rankings
=
sjr_ranking_caches
[(
name
,
second_name
)]
else
:
rankings
=
get_sjr_rank
(
name
)
sjr_ranking_caches
[(
name
,
second_name
)]
=
rankings
rank
=
get_sjr_in_cache
(
rankings
,
year
)
if
rank
is
None
:
result
.
append
(
[
"
J
"
,
name
,
second_name
,
int
(
year
),
None
,
None
,
None
]
)
else
:
result
.
append
(
[
"
J
"
,
name
,
second_name
,
int
(
year
),
rank
[
1
],
None
,
rank
[
2
]]
)
else
:
tqdm
.
write
(
f
"
venue:
{
venue
}
?
"
)
save_ranking_caches
(
sjr_ranking_caches
,
"
sjr
"
)
save_ranking_caches
(
sjr_ranking_caches
,
"
sjr
"
)
save_ranking_caches
(
core_ranking_caches
,
"
core
"
)
save_ranking_caches
(
core_ranking_caches
,
"
core
"
)
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment