Skip to content
Snippets Groups Projects
test_distance.py 3.06 KiB
from distance_batsim_output import *
import pytest

empty = pd.Series([], dtype="float64")
s1 = pd.Series([0, 10, 14, 500])
s2 = pd.Series([0, 10, 20, 500])
u = pd.Series([1, 1, 1, 1])
v = pd.Series([.5, .5, .5, .5])

####### Some unit tests #######
def test_euclidean_distance():
    assert euclidean_distance(s1, s1) == 0
    assert euclidean_distance(s1, s2) == euclidean_distance(s2, s1) == 6
    assert euclidean_distance(empty, empty) == 0
    assert euclidean_distance(u, v) == 1

def test_lateness_distance():
    assert lateness_distance(s1, s1) == 0
    assert lateness_distance(s1, s2) == - lateness_distance(s2, s1) == 6
    assert lateness_distance(empty, empty) == 0


####### Integration tests #######
three_jobs = "test/input/3jobs.csv"
three_jobs_w_session = "test/input/3jobs_w_sessions.csv"
three_jobs_zero = "test/input/3jobs_zeros.csv"
three_jobs_one_unsuccessful = "test/input/3jobs_1unsuccessful.csv"
mc_10days_a60 = "test/input/mc_10days_a60_jobs.csv"

def test_cleaning():
    # Clean session info:
    with pytest.warns(UserWarning):
        distances(three_jobs_w_session, three_jobs_zero)
    
    # Clean unsuccessful jobs:
    with pytest.warns(UserWarning):
        distances(three_jobs, three_jobs_one_unsuccessful)

    # Complain if no matching job_ids:
    with pytest.raises(KeyError):
        distances(three_jobs, mc_10days_a60)


def test_some_distances():
    fin, sub, start = ["finish_time"], ["submission_time"], ["starting_time"]

    # d(u,u) == 0 for all distances
    assert distances(three_jobs, three_jobs, field=sub) == 0 
    assert distances(three_jobs, three_jobs, field=fin) == 0 
    assert distances(three_jobs, three_jobs, 
        euclidean=False, norm_eucl=True, field=fin)     == 0 
    assert distances(three_jobs, three_jobs, 
        euclidean=False, lateness=True, field=fin)      == 0 

    # Eucl distance
    with pytest.warns(UserWarning):
        assert distances(three_jobs, three_jobs_zero, field=sub)    == 50
        assert distances(three_jobs_zero, three_jobs, field=start)  == 50
        assert distances(three_jobs, three_jobs_zero, field=fin)    == 100

        assert distances(three_jobs, three_jobs_w_session, field=sub)   == 0
        assert distances(three_jobs, three_jobs_w_session, field=start) == 0
        assert distances(three_jobs, three_jobs_w_session, field=fin)   == 20 * np.sqrt(2)

    # Normalized eucl distance
    with pytest.warns(UserWarning):
        assert distances(three_jobs, three_jobs_zero, 
            euclidean=False, norm_eucl=True, field=sub)           == None
        assert distances(three_jobs, three_jobs_w_session,
            euclidean=False, norm_eucl=True, field=sub)           == 0
        
        norm_dis_A_B = distances(three_jobs, three_jobs_w_session,
                            euclidean=False, norm_eucl=True, field=fin)
        norm_dis_B_A = distances(three_jobs, three_jobs_w_session,
                        euclidean=False, norm_eucl=True, field=fin)
    expected = 20 * np.sqrt(2) / 70     # eucl / sum_runtime
    assert norm_dis_A_B == norm_dis_B_A
    assert norm_dis_B_A - expected  < 1e-8