Skip to content

Commit bfafd4d

Browse files
committed
Add warnings to eval.jl
Deprecate eval_SC_chucks in favour of eval_SC_chunks (fixing typo) and adding warning regarding homophones/homographs in accuracy_comprehension
1 parent 4c320e4 commit bfafd4d

File tree

1 file changed

+72
-20
lines changed

1 file changed

+72
-20
lines changed

src/eval.jl

Lines changed: 72 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,10 @@ function eval_SC_loose end
2525
"""
2626
accuracy_comprehension(S, Shat, data)
2727
28-
Evaluate comprehension accuracy.
28+
Evaluate comprehension accuracy for training data.
29+
30+
!!! note
31+
In case of homophones/homographs in the dataset, the correct/incorrect values for base and inflections may be misleading! See below for more information.
2932
3033
# Obligatory Arguments
3134
- `S::Matrix`: the (gold standard) S matrix
@@ -47,16 +50,19 @@ accuracy_comprehension(
4750
base=[:Lexeme],
4851
inflections=[:Person, :Number, :Tense, :Voice, :Mood]
4952
)
50-
51-
accuracy_comprehension(
52-
S_val,
53-
Shat_val,
54-
latin_train,
55-
target_col=:Words,
56-
base=["Lexeme"],
57-
inflections=[:Person, :Number, :Tense, :Voice, :Mood]
58-
)
5953
```
54+
55+
# Note
56+
In case of homophones/homographs in the dataset, the correct/incorrect values for base and inflections may be misleading!
57+
Consider the following example: The wordform "Äpfel" in German can be nominative plural, genitive plural and accusative plural.
58+
Let's assume we have a dataset in which "Äpfel" occurs in all three case/number combinations (i.e. there are homographs).
59+
If all these wordforms have the same semantic vectors (e.g. because they are derived from word2vec or fasttext which typically
60+
have a single vector per unique wordform), the predicted semantic vector of the wordform "Äpfel" will be equally correlated
61+
with all three case/number combinations in the dataset. In such cases, while the algorithm in this function can unambiguously
62+
conclude that the correct surface form "Äpfel" was comprehended, which of the three possible rows is the correct one will be
63+
picked somewhat non-deterministically (see https://docs.julialang.org/en/v1/base/collections/#Base.argmax). It is thus possible
64+
that the algorithm will then use the genitive plural instead of the intended nominative plural as the ground plural, and will
65+
report that "case" was comprehended incorrectly.
6066
"""
6167
function accuracy_comprehension(
6268
S,
@@ -78,10 +84,16 @@ function accuracy_comprehension(
7884
dfr.r_target = corMat[diagind(corMat)]
7985
dfr.correct = [dfr.target[i] == dfr.form[i] for i = 1:size(dfr, 1)]
8086

87+
if length(data[:, target_col]) != length(Set(data[:, target_col]))
88+
@warn "This dataset contains homophones/homographs. Note that some of the results on the correctness of comprehended base/inflections may be misleading. See documentation of this function for more information."
89+
end
90+
8191
if !isnothing(inflections)
8292
all_features = vcat(base, inflections)
83-
else
93+
elseif !isnothing(base)
8494
all_features = base
95+
else
96+
all_features = []
8597
end
8698

8799
for f in all_features
@@ -110,7 +122,11 @@ end
110122
inflections = nothing,
111123
)
112124
113-
Evaluate comprehension accuracy.
125+
Evaluate comprehension accuracy for validation data.
126+
127+
!!! note
128+
In case of homophones/homographs in the dataset, the correct/incorrect values for base and inflections may be misleading! See below for more information.
129+
114130
115131
# Obligatory Arguments
116132
- `S_val::Matrix`: the (gold standard) S matrix of the validation data
@@ -137,6 +153,18 @@ accuracy_comprehension(
137153
inflections=[:Person, :Number, :Tense, :Voice, :Mood]
138154
)
139155
```
156+
157+
# Note
158+
In case of homophones/homographs in the dataset, the correct/incorrect values for base and inflections may be misleading!
159+
Consider the following example: The wordform "Äpfel" in German can be nominative plural, genitive plural and accusative plural.
160+
Let's assume we have a dataset in which "Äpfel" occurs in all three case/number combinations (i.e. there are homographs).
161+
If all these wordforms have the same semantic vectors (e.g. because they are derived from word2vec or fasttext which typically
162+
have a single vector per unique wordform), the predicted semantic vector of the wordform "Äpfel" will be equally correlated
163+
with all three case/number combinations in the dataset. In such cases, while the algorithm in this function can unambiguously
164+
conclude that the correct surface form "Äpfel" was comprehended, which of the three possible rows is the correct one will be
165+
picked somewhat non-deterministically (see https://docs.julialang.org/en/v1/base/collections/#Base.argmax). It is thus possible
166+
that the algorithm will then use the genitive plural instead of the intended nominative plural as the ground plural, and will
167+
report that "case" was comprehended incorrectly.
140168
"""
141169
function accuracy_comprehension(
142170
S_val,
@@ -160,6 +188,10 @@ function accuracy_comprehension(
160188

161189
append!(data_combined, data_train, promote=true)
162190

191+
if length(data_combined[:, target_col]) != length(Set(data_combined[:, target_col]))
192+
@warn "This dataset contains homophones/homographs. Note that some of the results on the correctness of comprehended base/inflections may be misleading. See documentation of this function for more information."
193+
end
194+
163195
corMat = cor(Shat_val, S, dims = 2)
164196
top_index = [i[2] for i in argmax(corMat, dims = 2)]
165197

@@ -435,7 +467,7 @@ function eval_SC(
435467

436468
# for first parts
437469
for j = 1:num_chucks-1
438-
correct += eval_SC_chucks(
470+
correct += eval_SC_chunks(
439471
SChat_d,
440472
SC_d,
441473
(j - 1) * batch_size + 1,
@@ -445,7 +477,7 @@ function eval_SC(
445477
verbose && ProgressMeter.next!(pb)
446478
end
447479
# for last part
448-
correct += eval_SC_chucks(
480+
correct += eval_SC_chunks(
449481
SChat_d,
450482
SC_d,
451483
(num_chucks - 1) * batch_size + 1,
@@ -504,7 +536,7 @@ function eval_SC(
504536

505537
# for first parts
506538
for j = 1:num_chucks-1
507-
correct += eval_SC_chucks(
539+
correct += eval_SC_chunks(
508540
SChat_d,
509541
SC_d,
510542
(j - 1) * batch_size + 1,
@@ -516,7 +548,7 @@ function eval_SC(
516548
verbose && ProgressMeter.next!(pb)
517549
end
518550
# for last part
519-
correct += eval_SC_chucks(
551+
correct += eval_SC_chunks(
520552
SChat_d,
521553
SC_d,
522554
(num_chucks - 1) * batch_size + 1,
@@ -529,13 +561,18 @@ function eval_SC(
529561
round(correct / l, digits=digits)
530562
end
531563

532-
function eval_SC_chucks(SChat, SC, s, e, batch_size)
564+
function eval_SC_chunks(SChat, SC, s, e, batch_size)
533565
rSC = cor(SChat[s:e, :], SC, dims = 2)
534566
v = [(rSC[i[1], i[1]+s-1] == rSC[i]) ? 1 : 0 for i in argmax(rSC, dims = 2)]
535567
sum(v)
536568
end
537569

538-
function eval_SC_chucks(SChat, SC, s, e, batch_size, data, target_col)
570+
function eval_SC_chucks(SChat, SC, s, e, batch_size)
571+
@warn "eval_SC_chucks is deprecated and will be removed in version 0.10 in favour of eval_SC_chunks"
572+
eval_SC_chunks(SChat, SC, s, e, batch_size)
573+
end
574+
575+
function eval_SC_chunks(SChat, SC, s, e, batch_size, data, target_col)
539576
rSC = cor(SChat[s:e, :], SC, dims = 2)
540577
v = [
541578
data[i[1]+s-1, target_col] == data[i[2], target_col] ? 1 : 0
@@ -544,13 +581,23 @@ function eval_SC_chucks(SChat, SC, s, e, batch_size, data, target_col)
544581
sum(v)
545582
end
546583

547-
function eval_SC_chucks(SChat, SC, s, batch_size)
584+
function eval_SC_chucks(SChat, SC, s, e, batch_size, data, target_col)
585+
@warn "eval_SC_chucks is deprecated and will be removed in version 0.10 in favour of eval_SC_chunks"
586+
eval_SC_chunks(SChat, SC, s, e, batch_size, data, target_col)
587+
end
588+
589+
function eval_SC_chunks(SChat, SC, s, batch_size)
548590
rSC = cor(SChat[s:end, :], SC, dims = 2)
549591
v = [(rSC[i[1], i[1]+s-1] == rSC[i]) ? 1 : 0 for i in argmax(rSC, dims = 2)]
550592
sum(v)
551593
end
552594

553-
function eval_SC_chucks(SChat, SC, s, batch_size, data, target_col)
595+
function eval_SC_chucks(SChat, SC, s, batch_size)
596+
@warn "eval_SC_chucks is deprecated and will be removed in version 0.10 in favour of eval_SC_chunks"
597+
eval_SC_chunks(SChat, SC, s, batch_size)
598+
end
599+
600+
function eval_SC_chunks(SChat, SC, s, batch_size, data, target_col)
554601
rSC = cor(SChat[s:end, :], SC, dims = 2)
555602
v = [
556603
data[i[1]+s-1, target_col] == data[i[2], target_col] ? 1 : 0
@@ -559,6 +606,11 @@ function eval_SC_chucks(SChat, SC, s, batch_size, data, target_col)
559606
sum(v)
560607
end
561608

609+
function eval_SC_chucks(SChat, SC, s, batch_size, data, target_col)
610+
@warn "eval_SC_chucks is deprecated and will be removed in version 0.10 in favour of eval_SC_chunks"
611+
eval_SC_chunks(SChat, SC, s, batch_size, data, target_col)
612+
end
613+
562614
"""
563615
eval_SC_loose(SChat, SC, k)
564616

0 commit comments

Comments
 (0)