Example: Figures

 1# Reproducing figures from the paper
 2# "Normalized mutual information is a biased measure for classification"
 3# (https://arxiv.org/abs/2307.01282)
 4import clustering_mi as cmi
 5
 6# Figure 1
 7
 8print("Figure 1:")
 9
10normalization_names = {"none": "I_0", "second": "NMI_0^A", "mean": "NMI_0^S"}
11
12for normalization, name in normalization_names.items():
13    NMI = cmi.normalized_mutual_information(
14        "data/2307.01282/fig_1_1.txt",
15        normalization=normalization,
16        variation="traditional",
17    )  # Traditional mutual information
18    print(f"{name}(c_1;g) = {NMI:.3f}")
19    NMI = cmi.normalized_mutual_information(
20        "data/2307.01282/fig_1_2.txt",
21        normalization=normalization,
22        variation="traditional",
23    )  # Traditional mutual information
24    print(f"{name}(c_2;g) = {NMI:.3f}")
25
26print()
27
28# Figure 2
29
30print("Figure 2:")
31
32normalization_names = {"none": "I_0", "second": "NMI_0^A", "mean": "NMI_0^S"}
33
34for normalization, name in normalization_names.items():
35    NMI = cmi.normalized_mutual_information(
36        "data/2307.01282/fig_2_1.txt",
37        normalization=normalization,
38        variation="traditional",
39    )  # Traditional mutual information
40    print(f"{name}(c_1;g) = {NMI:.3f}")
41    NMI = cmi.normalized_mutual_information(
42        "data/2307.01282/fig_2_2.txt",
43        normalization=normalization,
44        variation="traditional",
45    )  # Traditional mutual information
46    print(f"{name}(c_2;g) = {NMI:.3f}")
47
48print()
49
50# Figure 3
51
52print("Figure 3:")
53
54print("Contingency Table:")
55print(cmi._get_contingency_table("data/2307.01282/fig_3.txt"))
56
57normalizations = ["second", "mean", "first"]
58variations = ["traditional", "adjusted", "reduced"]
59
60for normalization in normalizations:
61    for variation in variations:
62        NMI = cmi.normalized_mutual_information(
63            "data/2307.01282/fig_3.txt",
64            normalization=normalization,
65            variation=variation,
66        )
67        print(
68            f"Normalization: {normalization}, Variation: {variation}, NMI = {NMI:.3f}"
69        )
70
71print()
72
73# Figure 4
74
75print("Figure 4:")
76
77dataset_names_dict = {"InfoMap": "fig_4_1.txt", "Modularity (γ = 2)": "fig_4_2.txt"}
78
79variations = ["traditional", "adjusted", "reduced"]
80normalizations = ["second", "mean"]
81
82for dataset_name, dataset_file in dataset_names_dict.items():
83    print(f"Dataset: {dataset_name}")
84    for variation in variations:
85        for normalization in normalizations:
86            NMI = cmi.normalized_mutual_information(
87                f"data/2307.01282/{dataset_file}",
88                normalization=normalization,
89                variation=variation,
90            )
91            print(
92                f"Normalization: {normalization}, Variation: {variation}, NMI = {NMI:.3f}"
93            )

Output:

Figure 1:
I_0(c_1;g) = 72.203
I_0(c_2;g) = 60.669
NMI_0^A(c_1;g) = 0.607
NMI_0^A(c_2;g) = 0.510
NMI_0^S(c_1;g) = 0.622
NMI_0^S(c_2;g) = 0.675

Figure 2:
I_0(c_1;g) = 21.090
I_0(c_2;g) = 19.632
NMI_0^A(c_1;g) = 0.348
NMI_0^A(c_2;g) = 0.324
NMI_0^S(c_1;g) = 0.348
NMI_0^S(c_2;g) = 0.359

Figure 3:
Contingency Table:
[[3 3 3 0 0 0 0 0 0]
 [0 0 0 3 3 3 0 0 0]
 [0 0 0 0 0 0 3 3 3]]
Normalization: second, Variation: traditional, NMI = 1.000
Normalization: second, Variation: adjusted, NMI = 0.687
Normalization: second, Variation: reduced, NMI = 0.750
Normalization: mean, Variation: traditional, NMI = 0.701
Normalization: mean, Variation: adjusted, NMI = 0.694
Normalization: mean, Variation: reduced, NMI = 0.444
Normalization: first, Variation: traditional, NMI = 0.540
Normalization: first, Variation: adjusted, NMI = 0.702
Normalization: first, Variation: reduced, NMI = 0.239

Figure 4:
Dataset: InfoMap
Normalization: second, Variation: traditional, NMI = 0.674
Normalization: mean, Variation: traditional, NMI = 0.547
Normalization: second, Variation: adjusted, NMI = 0.572
Normalization: mean, Variation: adjusted, NMI = 0.479
Normalization: second, Variation: reduced, NMI = 0.482
Normalization: mean, Variation: reduced, NMI = 0.390
Dataset: Modularity (γ = 2)
Normalization: second, Variation: traditional, NMI = 0.728
Normalization: mean, Variation: traditional, NMI = 0.469
Normalization: second, Variation: adjusted, NMI = 0.588
Normalization: mean, Variation: adjusted, NMI = 0.414
Normalization: second, Variation: reduced, NMI = 0.469
Normalization: mean, Variation: reduced, NMI = 0.294