Example: Figures¶
1# Reproducing figures from the paper
2# "Normalized mutual information is a biased measure for classification"
3# (https://arxiv.org/abs/2307.01282)
4import clustering_mi as cmi
5
6# Figure 1
7
8print("Figure 1:")
9
10normalization_names = {"none": "I_0", "second": "NMI_0^A", "mean": "NMI_0^S"}
11
12for normalization, name in normalization_names.items():
13 NMI = cmi.normalized_mutual_information(
14 "data/2307.01282/fig_1_1.txt",
15 normalization=normalization,
16 variation="traditional",
17 ) # Traditional mutual information
18 print(f"{name}(c_1;g) = {NMI:.3f}")
19 NMI = cmi.normalized_mutual_information(
20 "data/2307.01282/fig_1_2.txt",
21 normalization=normalization,
22 variation="traditional",
23 ) # Traditional mutual information
24 print(f"{name}(c_2;g) = {NMI:.3f}")
25
26print()
27
28# Figure 2
29
30print("Figure 2:")
31
32normalization_names = {"none": "I_0", "second": "NMI_0^A", "mean": "NMI_0^S"}
33
34for normalization, name in normalization_names.items():
35 NMI = cmi.normalized_mutual_information(
36 "data/2307.01282/fig_2_1.txt",
37 normalization=normalization,
38 variation="traditional",
39 ) # Traditional mutual information
40 print(f"{name}(c_1;g) = {NMI:.3f}")
41 NMI = cmi.normalized_mutual_information(
42 "data/2307.01282/fig_2_2.txt",
43 normalization=normalization,
44 variation="traditional",
45 ) # Traditional mutual information
46 print(f"{name}(c_2;g) = {NMI:.3f}")
47
48print()
49
50# Figure 3
51
52print("Figure 3:")
53
54print("Contingency Table:")
55print(cmi._get_contingency_table("data/2307.01282/fig_3.txt"))
56
57normalizations = ["second", "mean", "first"]
58variations = ["traditional", "adjusted", "reduced"]
59
60for normalization in normalizations:
61 for variation in variations:
62 NMI = cmi.normalized_mutual_information(
63 "data/2307.01282/fig_3.txt",
64 normalization=normalization,
65 variation=variation,
66 )
67 print(
68 f"Normalization: {normalization}, Variation: {variation}, NMI = {NMI:.3f}"
69 )
70
71print()
72
73# Figure 4
74
75print("Figure 4:")
76
77dataset_names_dict = {"InfoMap": "fig_4_1.txt", "Modularity (γ = 2)": "fig_4_2.txt"}
78
79variations = ["traditional", "adjusted", "reduced"]
80normalizations = ["second", "mean"]
81
82for dataset_name, dataset_file in dataset_names_dict.items():
83 print(f"Dataset: {dataset_name}")
84 for variation in variations:
85 for normalization in normalizations:
86 NMI = cmi.normalized_mutual_information(
87 f"data/2307.01282/{dataset_file}",
88 normalization=normalization,
89 variation=variation,
90 )
91 print(
92 f"Normalization: {normalization}, Variation: {variation}, NMI = {NMI:.3f}"
93 )
Output:
Figure 1:
I_0(c_1;g) = 72.203
I_0(c_2;g) = 60.669
NMI_0^A(c_1;g) = 0.607
NMI_0^A(c_2;g) = 0.510
NMI_0^S(c_1;g) = 0.622
NMI_0^S(c_2;g) = 0.675
Figure 2:
I_0(c_1;g) = 21.090
I_0(c_2;g) = 19.632
NMI_0^A(c_1;g) = 0.348
NMI_0^A(c_2;g) = 0.324
NMI_0^S(c_1;g) = 0.348
NMI_0^S(c_2;g) = 0.359
Figure 3:
Contingency Table:
[[3 3 3 0 0 0 0 0 0]
[0 0 0 3 3 3 0 0 0]
[0 0 0 0 0 0 3 3 3]]
Normalization: second, Variation: traditional, NMI = 1.000
Normalization: second, Variation: adjusted, NMI = 0.687
Normalization: second, Variation: reduced, NMI = 0.750
Normalization: mean, Variation: traditional, NMI = 0.701
Normalization: mean, Variation: adjusted, NMI = 0.694
Normalization: mean, Variation: reduced, NMI = 0.444
Normalization: first, Variation: traditional, NMI = 0.540
Normalization: first, Variation: adjusted, NMI = 0.702
Normalization: first, Variation: reduced, NMI = 0.239
Figure 4:
Dataset: InfoMap
Normalization: second, Variation: traditional, NMI = 0.674
Normalization: mean, Variation: traditional, NMI = 0.547
Normalization: second, Variation: adjusted, NMI = 0.572
Normalization: mean, Variation: adjusted, NMI = 0.479
Normalization: second, Variation: reduced, NMI = 0.482
Normalization: mean, Variation: reduced, NMI = 0.390
Dataset: Modularity (γ = 2)
Normalization: second, Variation: traditional, NMI = 0.728
Normalization: mean, Variation: traditional, NMI = 0.469
Normalization: second, Variation: adjusted, NMI = 0.588
Normalization: mean, Variation: adjusted, NMI = 0.414
Normalization: second, Variation: reduced, NMI = 0.469
Normalization: mean, Variation: reduced, NMI = 0.294