Module name changes in documentation and adds dlm.ldl.LDL.accuaracy (#8)

msaito8623 · web-flow · commit 7e0c01566df5 · 2025-01-09T14:05:18.000+01:00
* Fixes tests

* Adds the 'count' argument to dlm.ldl.LDL.gen_cmat.

* Adds the dependency on netcdf4 in pyproject.toml

* Adds a new argument 'mats' to dlm.ldl.LDL.save_matrices for saving matices selectively

* Fixes the generation of C-hat and S-hat in docs (quickstart.rst)

* Fixes pyldl to discriminative_lexicon_model in docs

* Adds a docstring to dlm.performance.accuracy

* Adds dlm.ldl.LDL.accuracy

* Updates .gitignore to exclude notes/
diff --git a/.gitignore b/.gitignore
@@ -3,3 +3,4 @@ discriminative_lexicon_model.egg-info/
 *.swp
 build/
 dist/
+notes/
diff --git a/discriminative_lexicon_model/ldl.py b/discriminative_lexicon_model/ldl.py
@@ -2,6 +2,7 @@
 import numpy as np
 
 from . import mapping as lm
+from . import performance as lp
 
 class LDL:
     def __init__ (self, words=None, embed_or_df=None, cmat=False, smat=False,
@@ -136,6 +137,34 @@ def load_matrices (self, directory, add=''):
             setattr(self, i, mat)
         return None
 
+    def accuracy (self, method='correlation', print_output=True):
+        acc_comp = acc_prod = None
+        exist_chat = hasattr(self, 'chat')
+        exist_shat = hasattr(self, 'shat')
+        if exist_chat:
+            acc_prod = lp.accuracy(pred=self.chat, gold=self.cmat, method=method)
+        if exist_shat:
+            acc_comp = lp.accuracy(pred=self.shat, gold=self.smat, method=method)
+        if (acc_comp is None) and (acc_prod is None):
+            raise ValueError('No C-hat or S-hat was found.')
+        if print_output:
+            if (acc_comp is None) and (not acc_prod is None):
+                acc_prod = 'Production: {}'.format(acc_prod)
+                acc = acc_prod
+            elif (not acc_comp is None) and (acc_prod is None):
+                acc_comp = 'Comprehension: {}'.format(acc_comp)
+                acc = acc_comp
+            else:
+                acc_prod = 'Production: {}'.format(acc_prod)
+                acc_comp = 'Comprehension: {}'.format(acc_comp)
+                acc = acc_comp + '\n' + acc_prod
+            print(acc)
+            acc = None
+        else:
+            acc = {'Comprehension': acc_comp, 'Production': acc_prod}
+            acc = { i:j for i,j in acc.items() if not j is None }
+        return acc
+
 def concat_cues (a):
     assert is_consecutive(a)
     a = pd.Series(a).str.slice(start=0, stop=1).iloc[:-1].str.cat(sep='') + pd.Series(a).iloc[-1]
diff --git a/discriminative_lexicon_model/performance.py b/discriminative_lexicon_model/performance.py
@@ -4,6 +4,44 @@
 import scipy.spatial.distance as spd
 
 def accuracy (*, pred, gold, method='correlation'):
+    """
+    Calculates prediction accuracy from a matrix of predictions and that of
+    gold-standard vectors. The prediction is considered as "correct" when its
+    corresponding gold-standard vector is the most strongly correlated with the
+    predicted vecor.
+
+    Parameters
+    ----------
+    pred : xarray.core.dataarray.DataArray
+        A matrix of predictions. It is usually a C-hat or S-hat matrix.
+    gold : xarray.core.dataarray.DataArray
+        A matrix of gold-standard vectors. It is usually a C or S matrix.
+    method : str
+        Which method to use to calculate distance/similarity. It must be
+        "correlation", "cosine" (for cosine similarity), and "euclidean" (for
+        euclidean distance).
+
+    Returns
+    -------
+    n : float
+        The accuracy of the predictions, namely the ratio of words that are
+        predicted correctly.to the total number of the words.
+
+    Examples
+    --------
+    >>> import discriminative_lexicon_model as dlm
+    >>> import pandas as pd
+    >>> words = ['cat','rat','hat']
+    >>> sems = pd.DataFrame({'<animate>':[1,1,0], '<object>':[0,0,1], '<predator>':[1,0,0]}, index=words)
+    >>> mdl = dlm.ldl.LDL()
+    >>> mdl.gen_cmat(words)
+    >>> mdl.gen_smat(sems)
+    >>> mdl.gen_gmat()
+    >>> mdl.gen_chat()
+    >>> print(dlm.performance.accuracy(pred=mdl.chat, gold=mdl.cmat, method='correlation'))
+    1.0
+    """
+
     pred = predict_df(pred=pred, gold=gold, n=1, method=method)
     acc = pred.Correct.sum() / len(pred)
     return acc

-Original file line number
+Diff line change
 *.swp
 build/
 dist/
 +notes/