|
4 | 4 | import scipy.spatial.distance as spd
|
5 | 5 |
|
6 | 6 | def accuracy (*, pred, gold, method='correlation'):
|
| 7 | + """ |
| 8 | + Calculates prediction accuracy from a matrix of predictions and that of |
| 9 | + gold-standard vectors. The prediction is considered as "correct" when its |
| 10 | + corresponding gold-standard vector is the most strongly correlated with the |
| 11 | + predicted vecor. |
| 12 | +
|
| 13 | + Parameters |
| 14 | + ---------- |
| 15 | + pred : xarray.core.dataarray.DataArray |
| 16 | + A matrix of predictions. It is usually a C-hat or S-hat matrix. |
| 17 | + gold : xarray.core.dataarray.DataArray |
| 18 | + A matrix of gold-standard vectors. It is usually a C or S matrix. |
| 19 | + method : str |
| 20 | + Which method to use to calculate distance/similarity. It must be |
| 21 | + "correlation", "cosine" (for cosine similarity), and "euclidean" (for |
| 22 | + euclidean distance). |
| 23 | +
|
| 24 | + Returns |
| 25 | + ------- |
| 26 | + n : float |
| 27 | + The accuracy of the predictions, namely the ratio of words that are |
| 28 | + predicted correctly.to the total number of the words. |
| 29 | +
|
| 30 | + Examples |
| 31 | + -------- |
| 32 | + >>> import discriminative_lexicon_model as dlm |
| 33 | + >>> import pandas as pd |
| 34 | + >>> words = ['cat','rat','hat'] |
| 35 | + >>> sems = pd.DataFrame({'<animate>':[1,1,0], '<object>':[0,0,1], '<predator>':[1,0,0]}, index=words) |
| 36 | + >>> mdl = dlm.ldl.LDL() |
| 37 | + >>> mdl.gen_cmat(words) |
| 38 | + >>> mdl.gen_smat(sems) |
| 39 | + >>> mdl.gen_gmat() |
| 40 | + >>> mdl.gen_chat() |
| 41 | + >>> print(dlm.performance.accuracy(pred=mdl.chat, gold=mdl.cmat, method='correlation')) |
| 42 | + 1.0 |
| 43 | + """ |
| 44 | + |
7 | 45 | pred = predict_df(pred=pred, gold=gold, n=1, method=method)
|
8 | 46 | acc = pred.Correct.sum() / len(pred)
|
9 | 47 | return acc
|
|
0 commit comments