Merge pull request #127 from quantling/early_stopping_bugfix

MariaHei · web-flow · commit 1eedd7938806 · 2024-11-03T21:19:56.000Z
Early stopping bugfix
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -79,7 +79,7 @@ jobs:
       - uses: actions/checkout@v2
       - uses: julia-actions/setup-julia@v1
         with:
-          version: '1'
+          version: '1.10'
       - run: |
           julia --project=docs -e '
             using Pkg
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "JudiLing"
 uuid = "b43a184b-0e9d-488b-813a-80fd5dbc9fd8"
 authors = ["Xuefeng Luo", "Maria Heitmeier"]
-version = "0.11.1"
+version = "0.12.0"
 
 [deps]
 BSON = "fbb218c0-5317-5bc6-957e-2ee96dd4b1f0"
diff --git a/README.md b/README.md
@@ -10,6 +10,9 @@ JudiLing: An implementation for Linear Discriminative Learning in Julia
 Maintainer: Maria Heitmeier [@MariaHei](https://github.com/MariaHei)\
 Original codebase: Xuefeng Luo [@MegamindHenry](https://github.com/MegamindHenry)
 
+**Note:**
+JudiLing versions prior to 0.12 had a bug in the early stopping mechanism. Training stopped automatically after `early_stopping` many epochs after beginning of training, rather than after `early_stopping` many epochs after the **best** epoch (in terms of loss or accuracy). Please use at least version 0.12 to get accurate results with `early_stopping`.
+
 ## Installation
 
 ```
diff --git a/src/deep_learning.jl b/src/deep_learning.jl
@@ -115,19 +115,9 @@ function get_and_train_model(X_train::Union{SparseMatrixCSC,Matrix},
     # set up early stopping and saving of best models
     min_loss = typemax(Float64)
     max_acc = -1
-
-    function id_func(x)
-        return (x)
-    end
-
-    if !ismissing(early_stopping)
-        if optimise_for_acc
-            init_score = max_acc
-        else
-            init_score = min_loss
-        end
-        es = Flux.early_stopping(id_func, early_stopping, init_score=init_score)
-    end
+    min_loss_es = typemax(Float64)
+    max_acc_es = -1
+    early_stopping_lag = 1
 
     # Set up the model if not provided
     verbose && println("Setting up model...")
@@ -273,11 +263,27 @@ function get_and_train_model(X_train::Union{SparseMatrixCSC,Matrix},
              end
 
              # early stopping
-             if optimise_for_acc
-                 !ismissing(early_stopping) && es(-acc) && break
-             else
-                 !ismissing(early_stopping) && es(mean_val_loss) && break
-             end
+             if !ismissing(early_stopping)
+                 if optimise_for_acc
+                    if acc > max_acc_es
+                         max_acc_es = acc
+                         early_stopping_lag = 1
+                    elseif early_stopping_lag >= early_stopping
+                        break
+                    else
+                         early_stopping_lag += 1
+                    end
+                else
+                     if mean_val_loss < min_loss_es
+                          min_loss_es = mean_val_loss
+                          early_stopping_lag = 1
+                     elseif early_stopping_lag >= early_stopping
+                         break
+                     else
+                          early_stopping_lag += 1
+                     end
+                 end
+            end
         else
 
             if !ismissing(measures_func)
diff --git a/test/deep_learning_tests.jl b/test/deep_learning_tests.jl
@@ -274,6 +274,7 @@ end
 
     @test JudiLing.eval_SC(Shat_train, S_train) ≈ 1.0
     @test Flux.mse(Shat_val', S_val') ≈ findmin(losses_val)[1]
+    @test findmin(losses_val)[2] + 20 == length(losses_val)
 
     res = JudiLing.get_and_train_model(cue_obj_train.C,
                                 S_train,
@@ -294,6 +295,37 @@ end
 
     @test JudiLing.eval_SC(Shat_train, S_train) ≈ 1.0
     @test JudiLing.eval_SC(Shat_val, S_val, S_train, val_es, train_es, :Word) ≈ findmax(accs_val)[1]
+    @test findmax(accs_val)[2] + 20 == length(accs_val)
+
+    res = JudiLing.get_and_train_model(cue_obj_train.C,
+                                S_train,
+                                cue_obj_val.C,
+                                S_val,
+                                train_es, val_es,
+                                :Word,
+                                "test.bson",
+                                return_losses=true,
+                                early_stopping=10,
+                                optimise_for_acc = true,
+                                batchsize=2)
+
+    model, losses_train, losses_val, accs_val = res.model, res.losses_train, res.losses_val, res.accs_val
+    @test findmax(accs_val)[2] + 10 == length(accs_val)
+
+    res = JudiLing.get_and_train_model(cue_obj_train.C,
+                                S_train,
+                                cue_obj_val.C,
+                                S_val,
+                                train_es, val_es,
+                                :Word,
+                                "test.bson",
+                                return_losses=true,
+                                early_stopping=10,
+                                n_epochs=1000,
+                                batchsize=2)
+
+    model, losses_train, losses_val, accs_val = res.model, res.losses_train, res.losses_val, res.accs_val
+    @test findmin(losses_val)[2] + 10 == length(losses_val)
 
 end