Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
B
batsched
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
poquet
millian
batsched
Commits
767d3220
Commit
767d3220
authored
1 year ago
by
Millian Poquet
Browse files
Options
Downloads
Plain Diff
Merge branch 'easy_bf_fast_walltime'
parents
058fd1ef
0bb0e37f
Branches
Branches containing commit
No related tags found
No related merge requests found
Pipeline
#8234
failed
1 year ago
Stage: big_stage
Changes
3
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
.gitignore
+1
-0
1 addition, 0 deletions
.gitignore
src/algo/easy_bf_fast.cpp
+45
-28
45 additions, 28 deletions
src/algo/easy_bf_fast.cpp
src/algo/easy_bf_fast.hpp
+3
-1
3 additions, 1 deletion
src/algo/easy_bf_fast.hpp
with
49 additions
and
29 deletions
.gitignore
+
1
−
0
View file @
767d3220
...
@@ -5,3 +5,4 @@ test-instances
...
@@ -5,3 +5,4 @@ test-instances
cover
cover
result
result
.vscode
.vscode
.cache
This diff is collapsed.
Click to expand it.
src/algo/easy_bf_fast.cpp
+
45
−
28
View file @
767d3220
...
@@ -49,8 +49,11 @@ void EasyBackfillingFast::make_decisions(double date,
...
@@ -49,8 +49,11 @@ void EasyBackfillingFast::make_decisions(double date,
// - only handles one priority job (the first of the queue)
// - only handles one priority job (the first of the queue)
// - only handles time as floating-point (-> precision errors).
// - only handles time as floating-point (-> precision errors).
// Hacks:
// Warning: you might obtain different outputs than with easy_bf. This is
// - uses priority job's completion time to store its expected starting time
// due to the fact that this version only keeps track of the priority job
// expected start time and the number of machines available then, while
// easy_bf keeps track of a full 2D schedule of the future. easy_bf_fast
// will sometimes be a little more greedy in backfilling.
bool
job_ended
=
false
;
bool
job_ended
=
false
;
...
@@ -121,7 +124,7 @@ void EasyBackfillingFast::make_decisions(double date,
...
@@ -121,7 +124,7 @@ void EasyBackfillingFast::make_decisions(double date,
{
{
// The job becomes priority!
// The job becomes priority!
_priority_job
=
pending_job
;
_priority_job
=
pending_job
;
_priority_job
->
completion_time
=
compu
te_priority_job_expected_earliest_start
ing
_time
();
upda
te_priority_job_expected_earliest_start_time
();
_pending_jobs
.
erase
(
job_it
);
_pending_jobs
.
erase
(
job_it
);
// Stop first queue traversal.
// Stop first queue traversal.
...
@@ -133,13 +136,19 @@ void EasyBackfillingFast::make_decisions(double date,
...
@@ -133,13 +136,19 @@ void EasyBackfillingFast::make_decisions(double date,
// Backfill jobs that does not hinder priority job.
// Backfill jobs that does not hinder priority job.
if
(
_nb_available_machines
>
0
)
if
(
_nb_available_machines
>
0
)
{
{
// Update priority job expected starting time (might have changed if a recently ended job
// completed before its walltime)
if
(
_priority_job
!=
nullptr
)
update_priority_job_expected_earliest_start_time
();
for
(
auto
job_it
=
_pending_jobs
.
begin
();
for
(
auto
job_it
=
_pending_jobs
.
begin
();
job_it
!=
_pending_jobs
.
end
();
)
job_it
!=
_pending_jobs
.
end
();
)
{
{
const
Job
*
pending_job
=
*
job_it
;
const
Job
*
pending_job
=
*
job_it
;
// Can the job be executed now ?
// Can the job be executed now
(without hindering priority job)
?
if
(
pending_job
->
nb_requested_resources
<=
_nb_available_machines
&&
if
(
pending_job
->
nb_requested_resources
<=
_nb_available_machines
&&
date
+
pending_job
->
walltime
<=
_priority_job
->
completion_time
)
(
date
+
pending_job
->
walltime
<=
_priority_job_expected_start_time
||
pending_job
->
nb_requested_resources
<=
_remaining_resources_at_priority_job_start
))
{
{
// Yes, it can be backfilled!
// Yes, it can be backfilled!
alloc
.
machines
=
_available_machines
.
left
(
alloc
.
machines
=
_available_machines
.
left
(
...
@@ -156,6 +165,8 @@ void EasyBackfillingFast::make_decisions(double date,
...
@@ -156,6 +165,8 @@ void EasyBackfillingFast::make_decisions(double date,
_nb_available_machines
-=
pending_job
->
nb_requested_resources
;
_nb_available_machines
-=
pending_job
->
nb_requested_resources
;
_current_allocations
[
pending_job
->
id
]
=
alloc
;
_current_allocations
[
pending_job
->
id
]
=
alloc
;
job_it
=
_pending_jobs
.
erase
(
job_it
);
job_it
=
_pending_jobs
.
erase
(
job_it
);
if
(
date
+
pending_job
->
walltime
>
_priority_job_expected_start_time
)
_remaining_resources_at_priority_job_start
-=
pending_job
->
nb_requested_resources
;
// Directly get out of the backfilling loop if all machines are busy.
// Directly get out of the backfilling loop if all machines are busy.
if
(
_nb_available_machines
<=
0
)
if
(
_nb_available_machines
<=
0
)
...
@@ -175,13 +186,25 @@ void EasyBackfillingFast::make_decisions(double date,
...
@@ -175,13 +186,25 @@ void EasyBackfillingFast::make_decisions(double date,
{
{
Job
*
new_job
=
(
*
_workload
)[
new_job_id
];
Job
*
new_job
=
(
*
_workload
)[
new_job_id
];
// Is the job valid on this platform?
if
(
new_job
->
nb_requested_resources
>
_nb_machines
)
{
_decision
->
add_reject_job
(
new_job_id
,
date
);
}
else
if
(
!
new_job
->
has_walltime
)
{
_decision
->
add_reject_job
(
new_job_id
,
date
);
}
// Can the job be executed right now?
// Can the job be executed right now?
if
(
new_job
->
nb_requested_resources
<=
_nb_available_machines
)
else
if
(
new_job
->
nb_requested_resources
<=
_nb_available_machines
)
{
{
//LOG_F(INFO, "There are enough available resources (%d) to execute job %s", _nb_available_machines, new_job->id.c_str());
//LOG_F(INFO, "There are enough available resources (%d) to execute job %s", _nb_available_machines, new_job->id.c_str());
// Can it be executed now (without hindering priority job
?
)
// Can it be executed now (without hindering priority job)
?
if
(
_priority_job
==
nullptr
||
if
(
_priority_job
==
nullptr
||
date
+
new_job
->
walltime
<=
_priority_job
->
completion_time
)
date
+
new_job
->
walltime
<=
_priority_job_expected_start_time
||
new_job
->
nb_requested_resources
<=
_remaining_resources_at_priority_job_start
)
{
{
//LOG_F(INFO, "Job %s can be started right away!", new_job->id.c_str());
//LOG_F(INFO, "Job %s can be started right away!", new_job->id.c_str());
// Yes, the job can be executed right away!
// Yes, the job can be executed right away!
...
@@ -200,12 +223,14 @@ void EasyBackfillingFast::make_decisions(double date,
...
@@ -200,12 +223,14 @@ void EasyBackfillingFast::make_decisions(double date,
_available_machines
-=
alloc
.
machines
;
_available_machines
-=
alloc
.
machines
;
_nb_available_machines
-=
new_job
->
nb_requested_resources
;
_nb_available_machines
-=
new_job
->
nb_requested_resources
;
_current_allocations
[
new_job_id
]
=
alloc
;
_current_allocations
[
new_job_id
]
=
alloc
;
if
(
_priority_job
!=
nullptr
&&
date
+
new_job
->
walltime
>
_priority_job_expected_start_time
)
_remaining_resources_at_priority_job_start
-=
new_job
->
nb_requested_resources
;
}
}
else
else
{
{
// No, the job cannot be executed (hinders priority job.)
// No, the job cannot be executed (hinders priority job.)
/*LOG_F(INFO, "Not enough time to execute job %s (walltime=%g, priority job expected starting time=%g)",
/*LOG_F(INFO, "Not enough time to execute job %s (walltime=%g, priority job expected starting time=%g)",
new_job->id.c_str(), (double)new_job->walltime, _priority_job
->completion
_time);*/
new_job->id.c_str(), (double)new_job->walltime, _priority_job
_expected_start
_time);*/
_pending_jobs
.
push_back
(
new_job
);
_pending_jobs
.
push_back
(
new_job
);
}
}
}
}
...
@@ -213,32 +238,22 @@ void EasyBackfillingFast::make_decisions(double date,
...
@@ -213,32 +238,22 @@ void EasyBackfillingFast::make_decisions(double date,
{
{
// The job is too big to fit now.
// The job is too big to fit now.
// Is the job valid on this platform?
if
(
_priority_job
==
nullptr
)
if
(
new_job
->
nb_requested_resources
>
_nb_machines
)
{
{
/
*LOG_F(INFO, "Rejecing job %s (required %d machines, while platform size is %d)",
/
/ The job becomes priority.
new_job->id.c_str(), new_job->nb_requested_resources, _nb_machines);*/
_priority_job
=
new_job
;
_decision
->
add_reject_job
(
new_job_id
,
date
);
update_priority_job_expected_earliest_start_time
(
);
}
}
else
else
{
{
if
(
_priority_job
==
nullptr
)
// The job is queued up.
{
_pending_jobs
.
push_back
(
new_job
);
// The job becomes priority.
_priority_job
=
new_job
;
_priority_job
->
completion_time
=
compute_priority_job_expected_earliest_starting_time
();
}
else
{
// The job is queued up.
_pending_jobs
.
push_back
(
new_job
);
}
}
}
}
}
}
}
}
}
double
EasyBackfillingFast
::
compu
te_priority_job_expected_earliest_start
ing
_time
()
void
EasyBackfillingFast
::
upda
te_priority_job_expected_earliest_start_time
()
{
{
int
nb_available
=
_nb_available_machines
;
int
nb_available
=
_nb_available_machines
;
int
required
=
_priority_job
->
nb_requested_resources
;
int
required
=
_priority_job
->
nb_requested_resources
;
...
@@ -249,12 +264,14 @@ double EasyBackfillingFast::compute_priority_job_expected_earliest_starting_time
...
@@ -249,12 +264,14 @@ double EasyBackfillingFast::compute_priority_job_expected_earliest_starting_time
if
(
nb_available
>=
required
)
if
(
nb_available
>=
required
)
{
{
return
it
->
date
;
_priority_job_expected_start_time
=
it
->
date
;
_remaining_resources_at_priority_job_start
=
nb_available
-
required
;
return
;
}
}
}
}
PPK_ASSERT_ERROR
(
false
,
"The job will never be executable."
);
PPK_ASSERT_ERROR
(
false
,
"The job will never be executable."
);
return
0
;
return
;
}
}
std
::
list
<
EasyBackfillingFast
::
FinishedHorizonPoint
>::
iterator
EasyBackfillingFast
::
insert_horizon_point
(
const
EasyBackfillingFast
::
FinishedHorizonPoint
&
point
)
std
::
list
<
EasyBackfillingFast
::
FinishedHorizonPoint
>::
iterator
EasyBackfillingFast
::
insert_horizon_point
(
const
EasyBackfillingFast
::
FinishedHorizonPoint
&
point
)
...
...
This diff is collapsed.
Click to expand it.
src/algo/easy_bf_fast.hpp
+
3
−
1
View file @
767d3220
...
@@ -39,7 +39,7 @@ private:
...
@@ -39,7 +39,7 @@ private:
};
};
private
:
private
:
double
compu
te_priority_job_expected_earliest_start
ing
_time
();
void
upda
te_priority_job_expected_earliest_start_time
();
std
::
list
<
FinishedHorizonPoint
>::
iterator
insert_horizon_point
(
const
FinishedHorizonPoint
&
point
);
std
::
list
<
FinishedHorizonPoint
>::
iterator
insert_horizon_point
(
const
FinishedHorizonPoint
&
point
);
private
:
private
:
...
@@ -59,4 +59,6 @@ private:
...
@@ -59,4 +59,6 @@ private:
// At any time, null if there is no priority job (no waiting job)
// At any time, null if there is no priority job (no waiting job)
Job
*
_priority_job
=
nullptr
;
Job
*
_priority_job
=
nullptr
;
double
_priority_job_expected_start_time
=
-
1
;
int
_remaining_resources_at_priority_job_start
=
-
1
;
};
};
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment