1
1
"""
2
- store paths information found by shuo or hua function
2
+ store paths information found by learn_paths or build_paths function
3
3
"""
4
4
struct Result_Path_Info_Struct
5
5
ngrams_ind:: Array
@@ -18,20 +18,20 @@ struct Gold_Path_Info_Struct
18
18
end
19
19
20
20
"""
21
- shuo function takes each timestep individually and calculate Yt_hat respectively,
21
+ learn_paths function takes each timestep individually and calculate Yt_hat respectively,
22
22
"""
23
- function shuo end
23
+ function learn_paths end
24
24
25
25
"""
26
- hua function is shortcut algorithms that only takes n-grams that closed to the
26
+ build_paths function is shortcut algorithms that only takes n-grams that closed to the
27
27
validation data
28
28
"""
29
- function hua end
29
+ function build_paths end
30
30
31
31
"""
32
- shuo (::DataFrame,::DataFrame,::SparseMatrixCSC,::Union{SparseMatrixCSC, Matrix},::Union{SparseMatrixCSC, Matrix},::Matrix,::SparseMatrixCSC,::Dict)
32
+ learn_paths (::DataFrame,::DataFrame,::SparseMatrixCSC,::Union{SparseMatrixCSC, Matrix},::Union{SparseMatrixCSC, Matrix},::Matrix,::SparseMatrixCSC,::Dict)
33
33
34
- shuo function takes each timestep individually and calculate Yt_hat respectively,
34
+ learn_paths function takes each timestep individually and calculate Yt_hat respectively,
35
35
36
36
...
37
37
# Arguments
@@ -48,7 +48,7 @@ shuo function takes each timestep individually and calculate Yt_hat respectively
48
48
- `tokenized::Bool=false`: whether tokenized
49
49
- `sep_token::Union{Nothing, String, Char}=nothing`: seperate token
50
50
- `keep_sep::Bool=false`: whether keep seperaters in grams
51
- - `words_column ::Union{String, :Symbol}=:Words`: word column names
51
+ - `target_col ::Union{String, :Symbol}=:Words`: word column names
52
52
- `issparse::Symbol=:auto`: mt matrix output format mode
53
53
- `verbose::Bool=false`: if verbose, more information will be printed out
54
54
@@ -58,7 +58,7 @@ latin_train = CSV.DataFrame!(CSV.File(joinpath("data", "latin_mini.csv")))
58
58
cue_obj_train = JudiLing.make_cue_matrix(
59
59
latin_train,
60
60
grams=3,
61
- words_column =:Word,
61
+ target_col =:Word,
62
62
tokenized=false,
63
63
keep_sep=false
64
64
)
@@ -68,7 +68,7 @@ cue_obj_val = JudiLing.make_cue_matrix(
68
68
latin_val,
69
69
cue_obj_train,
70
70
grams=3,
71
- words_column =:Word,
71
+ target_col =:Word,
72
72
tokenized=false,
73
73
keep_sep=false
74
74
)
@@ -96,7 +96,7 @@ A = cue_obj_train.A
96
96
97
97
max_t = JudiLing.cal_max_timestep(latin_train, latin_val, :Word)
98
98
99
- res_train, gpi_train = JudiLing.shuo (
99
+ res_train, gpi_train = JudiLing.learn_paths (
100
100
latin_train,
101
101
latin_train,
102
102
cue_obj_train.C,
@@ -115,11 +115,11 @@ res_train, gpi_train = JudiLing.shuo(
115
115
tokenized=false,
116
116
sep_token="_",
117
117
keep_sep=false,
118
- words_column =:Word,
118
+ target_col =:Word,
119
119
issparse=:dense,
120
120
verbose=false)
121
121
122
- res_val, gpi_val = JudiLing.shuo (
122
+ res_val, gpi_val = JudiLing.learn_paths (
123
123
latin_train,
124
124
latin_val,
125
125
cue_obj_train.C,
@@ -141,13 +141,13 @@ res_val, gpi_val = JudiLing.shuo(
141
141
tokenized=false,
142
142
sep_token="-",
143
143
keep_sep=false,
144
- words_column =:Word,
144
+ target_col =:Word,
145
145
issparse=:dense,
146
146
verbose=false)
147
147
```
148
148
...
149
149
"""
150
- function shuo (
150
+ function learn_paths (
151
151
data_train:: DataFrame ,
152
152
data_val:: DataFrame ,
153
153
C_train:: SparseMatrixCSC ,
@@ -169,7 +169,7 @@ function shuo(
169
169
tokenized= false :: Bool ,
170
170
sep_token= nothing :: Union{Nothing, String, Char} ,
171
171
keep_sep= false :: Bool ,
172
- words_column = " Words" :: String ,
172
+ target_col = " Words" :: String ,
173
173
issparse= :auto :: Symbol ,
174
174
verbose= false :: Bool
175
175
):: Union {Tuple{Vector{Vector{Result_Path_Info_Struct}}, Vector{Gold_Path_Info_Struct}}, Vector{Vector{Result_Path_Info_Struct}}}
@@ -206,7 +206,7 @@ function shuo(
206
206
i,
207
207
data_train,
208
208
grams= grams,
209
- words_column = words_column ,
209
+ target_col = target_col ,
210
210
tokenized= tokenized,
211
211
sep_token= sep_token,
212
212
keep_sep= keep_sep)
@@ -342,21 +342,21 @@ function shuo(
342
342
end
343
343
344
344
"""
345
- hua (::DataFrame,::SparseMatrixCSC,::Union{SparseMatrixCSC, Matrix},::::Union{SparseMatrixCSC, Matrix},::Matrix,::SparseMatrixCSC,::Dict,::Array)
345
+ build_paths (::DataFrame,::SparseMatrixCSC,::Union{SparseMatrixCSC, Matrix},::::Union{SparseMatrixCSC, Matrix},::Matrix,::SparseMatrixCSC,::Dict,::Array)
346
346
347
- hua function is shortcut algorithms that only takes n-grams that closed to the
347
+ build_paths function is shortcut algorithms that only takes n-grams that closed to the
348
348
validation data
349
349
350
350
...
351
351
# Arguments
352
- - `rC::Union{Nothing, Matrix}=nothing`: correlation between c and chat , passing to save computing time
352
+ - `rC::Union{Nothing, Matrix}=nothing`: correlation Matrix of C and Chat , passing it to save computing time
353
353
- `max_t::Integer=15`: maximum timestep
354
354
- `max_can::Integer=10`: maximum candidates when output
355
355
- `n_neighbors::Integer=10`: find indices only in top n neighbors
356
356
- `grams::Integer=3`: n-grams
357
357
- `tokenized::Bool=false`: whether tokenized
358
358
- `sep_token::Union{Nothing, String, Char}=nothing`: seperate token
359
- - `words_column ::Union{String, :Symbol}=:Words`: word column names
359
+ - `target_col ::Union{String, :Symbol}=:Words`: word column names
360
360
- `verbose::Bool=false`: if verbose, more information will be printed out
361
361
362
362
# Examples
@@ -365,7 +365,7 @@ latin_train = CSV.DataFrame!(CSV.File(joinpath("data", "latin_mini.csv")))
365
365
cue_obj_train = JudiLing.make_cue_matrix(
366
366
latin_train,
367
367
grams=3,
368
- words_column =:Word,
368
+ target_col =:Word,
369
369
tokenized=false,
370
370
keep_sep=false
371
371
)
@@ -375,7 +375,7 @@ cue_obj_val = JudiLing.make_cue_matrix(
375
375
latin_val,
376
376
cue_obj_train,
377
377
grams=3,
378
- words_column =:Word,
378
+ target_col =:Word,
379
379
tokenized=false,
380
380
keep_sep=false
381
381
)
@@ -403,7 +403,7 @@ A = cue_obj_train.A
403
403
404
404
max_t = JudiLing.cal_max_timestep(latin_train, latin_val, :Word)
405
405
406
- JudiLing.hua (
406
+ JudiLing.build_paths (
407
407
latin_train,
408
408
cue_obj_train.C,
409
409
S_train,
@@ -417,7 +417,7 @@ JudiLing.hua(
417
417
verbose=false
418
418
)
419
419
420
- JudiLing.hua (
420
+ JudiLing.build_paths (
421
421
latin_val,
422
422
cue_obj_train.C,
423
423
S_val,
@@ -433,7 +433,7 @@ JudiLing.hua(
433
433
```
434
434
...
435
435
"""
436
- function hua (
436
+ function build_paths (
437
437
data_val:: DataFrame ,
438
438
C_train:: SparseMatrixCSC ,
439
439
S_val:: Union{SparseMatrixCSC, Matrix} ,
@@ -449,7 +449,7 @@ function hua(
449
449
grams= 3 :: Integer ,
450
450
tokenized= false :: Bool ,
451
451
sep_token= nothing :: Union{Nothing, String, Char} ,
452
- words_column = :Words :: Union{String, Symbol} ,
452
+ target_col = :Words :: Union{String, Symbol} ,
453
453
verbose= false :: Bool
454
454
):: Vector{Vector{Result_Path_Info_Struct}}
455
455
# initialize queues for storing paths
0 commit comments