@@ -3,7 +3,7 @@ store paths information found by learn_paths or build_paths function
3
3
"""
4
4
struct Result_Path_Info_Struct
5
5
ngrams_ind:: Array
6
- num_tolerance:: Integer
6
+ num_tolerance:: Int64
7
7
support:: AbstractFloat
8
8
end
9
9
@@ -38,13 +38,13 @@ learn_paths function takes each timestep individually and calculate Yt_hat respe
38
38
- `gold_ind::Union{Nothing, Vector}=nothing`: for in gold_path_info mode
39
39
- `Shat_val::Union{Nothing, Matrix}=nothing`: for gold_path_info mode
40
40
- `check_gold_path::Bool=false`: if turn on gold_path_info mode
41
- - `max_t::Integer =15`: maximum timestep
42
- - `max_can::Integer =10`: maximum candidates when output
41
+ - `max_t::Int64 =15`: maximum timestep
42
+ - `max_can::Int64 =10`: maximum candidates when output
43
43
- `threshold::AbstractFloat=0.1`: for each timestep, only grams greater than threshold will be selected
44
44
- `is_tolerant::Bool=false`: if in tolerant mode, path allows limited nodes under threshold but greater than tolerance
45
45
- `tolerance::AbstractFloat=(-1000.0)`: in tolerant mode, only nodes greater than tolerance and lesser than threshold will be selected
46
- - `max_tolerance::Integer =4`: maximum numbers of nodes allowed in a path
47
- - `grams::Integer =3`: n-grams
46
+ - `max_tolerance::Int64 =4`: maximum numbers of nodes allowed in a path
47
+ - `grams::Int64 =3`: n-grams
48
48
- `tokenized::Bool=false`: whether tokenized
49
49
- `sep_token::Union{Nothing, String, Char}=nothing`: seperate token
50
50
- `keep_sep::Bool=false`: whether keep seperaters in grams
@@ -159,13 +159,13 @@ function learn_paths(
159
159
gold_ind= nothing :: Union{Nothing, Vector} ,
160
160
Shat_val= nothing :: Union{Nothing, Matrix} ,
161
161
check_gold_path= false :: Bool ,
162
- max_t= 15 :: Integer ,
163
- max_can= 10 :: Integer ,
162
+ max_t= 15 :: Int64 ,
163
+ max_can= 10 :: Int64 ,
164
164
threshold= 0.1 :: AbstractFloat ,
165
165
is_tolerant= false :: Bool ,
166
166
tolerance= (- 1000.0 ):: AbstractFloat ,
167
- max_tolerance= 4 :: Integer ,
168
- grams= 3 :: Integer ,
167
+ max_tolerance= 4 :: Int64 ,
168
+ grams= 3 :: Int64 ,
169
169
tokenized= false :: Bool ,
170
170
sep_token= nothing :: Union{Nothing, String, Char} ,
171
171
keep_sep= false :: Bool ,
@@ -176,12 +176,12 @@ function learn_paths(
176
176
177
177
# initialize queues for storing paths
178
178
n_val = size (data_val, 1 )
179
- # working_q = Array{Queue{Array{Integer ,1}},1}(undef, n_val)
180
- working_q = Vector{Queue{Tuple{Vector{Integer }, Integer }}}(undef, n_val)
181
- # res = Array{Array{Array{Integer }},1}(undef, n_val)
182
- res = Vector{Vector{Tuple{Vector{Integer }, Integer }}}(undef, n_val)
179
+ # working_q = Array{Queue{Array{Int64 ,1}},1}(undef, n_val)
180
+ working_q = Vector{Queue{Tuple{Vector{Int64 }, Int64 }}}(undef, n_val)
181
+ # res = Array{Array{Array{Int64 }},1}(undef, n_val)
182
+ res = Vector{Vector{Tuple{Vector{Int64 }, Int64 }}}(undef, n_val)
183
183
for j in 1 : n_val
184
- res[j] = Tuple{Vector{Integer }, Integer }[]
184
+ res[j] = Tuple{Vector{Int64 }, Int64 }[]
185
185
end
186
186
187
187
# # initialize gold_path_info supports
@@ -245,7 +245,7 @@ function learn_paths(
245
245
246
246
# for timestep 2 and after 2
247
247
if isassigned (working_q, j)
248
- tmp_working_q = Queue {Tuple{Vector{Integer},Integer }} ()
248
+ tmp_working_q = Queue {Tuple{Vector{Int64},Int64 }} ()
249
249
while ! isempty (working_q[j])
250
250
a = dequeue! (working_q[j]) # # a = [11] Al[11,5] == 1 # candidates = [1, 5, 7]
251
251
@@ -287,11 +287,11 @@ function learn_paths(
287
287
working_q[j] = tmp_working_q
288
288
# for timestep 1
289
289
else
290
- working_q[j] = Queue {Tuple{Vector{Integer},Integer }} ()
290
+ working_q[j] = Queue {Tuple{Vector{Int64},Int64 }} ()
291
291
for c in candidates_t
292
292
# check whether a n-gram is a start n-gram
293
293
if isstart (c, i2f, tokenized= tokenized, sep_token= sep_token)
294
- a = Integer []
294
+ a = Int64 []
295
295
push! (a, c)
296
296
# check whether this n-gram is both start and complete
297
297
if iscomplete (a, i2f, tokenized= tokenized, sep_token= sep_token)
@@ -306,7 +306,7 @@ function learn_paths(
306
306
for c in candidates_t_tlr
307
307
# check whether a n-gram is a start n-gram
308
308
if isstart (c, i2f, tokenized= tokenized, sep_token= sep_token)
309
- a = Integer []
309
+ a = Int64 []
310
310
push! (a, c)
311
311
# check whether this n-gram is both start and complete
312
312
if iscomplete (a, i2f, tokenized= tokenized, sep_token= sep_token)
@@ -350,10 +350,10 @@ validation data
350
350
...
351
351
# Arguments
352
352
- `rC::Union{Nothing, Matrix}=nothing`: correlation Matrix of C and Chat, passing it to save computing time
353
- - `max_t::Integer =15`: maximum timestep
354
- - `max_can::Integer =10`: maximum candidates when output
355
- - `n_neighbors::Integer =10`: find indices only in top n neighbors
356
- - `grams::Integer =3`: n-grams
353
+ - `max_t::Int64 =15`: maximum timestep
354
+ - `max_can::Int64 =10`: maximum candidates when output
355
+ - `n_neighbors::Int64 =10`: find indices only in top n neighbors
356
+ - `grams::Int64 =3`: n-grams
357
357
- `tokenized::Bool=false`: whether tokenized
358
358
- `sep_token::Union{Nothing, String, Char}=nothing`: seperate token
359
359
- `target_col::Union{String, :Symbol}=:Words`: word column names
@@ -443,23 +443,23 @@ function build_paths(
443
443
i2f:: Dict ,
444
444
C_train_ind:: Array ;
445
445
rC= nothing :: Union{Nothing, Matrix} ,
446
- max_t= 15 :: Integer ,
447
- max_can= 10 :: Integer ,
448
- n_neighbors= 10 :: Integer ,
449
- grams= 3 :: Integer ,
446
+ max_t= 15 :: Int64 ,
447
+ max_can= 10 :: Int64 ,
448
+ n_neighbors= 10 :: Int64 ,
449
+ grams= 3 :: Int64 ,
450
450
tokenized= false :: Bool ,
451
451
sep_token= nothing :: Union{Nothing, String, Char} ,
452
452
target_col= :Words :: Union{String, Symbol} ,
453
453
verbose= false :: Bool
454
454
):: Vector{Vector{Result_Path_Info_Struct}}
455
455
# initialize queues for storing paths
456
456
n_val = size (data_val, 1 )
457
- # working_q = Array{Queue{Array{Integer ,1}},1}(undef, n_val)
458
- # res = Array{Array{Array{Integer }},1}(undef, n_val)
459
- res = Vector{Vector{Tuple{Vector{Integer }, Integer }}}(undef, n_val)
457
+ # working_q = Array{Queue{Array{Int64 ,1}},1}(undef, n_val)
458
+ # res = Array{Array{Array{Int64 }},1}(undef, n_val)
459
+ res = Vector{Vector{Tuple{Vector{Int64 }, Int64 }}}(undef, n_val)
460
460
461
461
for j in 1 : n_val
462
- res[j] = Tuple{Vector{Integer }, Integer }[]
462
+ res[j] = Tuple{Vector{Int64 }, Int64 }[]
463
463
end
464
464
465
465
verbose && println (" Finding all top features.." )
@@ -483,11 +483,11 @@ function build_paths(
483
483
candidates_t = top_indices[j]
484
484
485
485
# timestep 1
486
- working_q = Queue {Array{Integer , 1}} ()
486
+ working_q = Queue {Array{Int64 , 1}} ()
487
487
for c in candidates_t
488
488
# check whether a n-gram is a start n-gram
489
489
if isstart (c, i2f, tokenized= tokenized, sep_token= sep_token)
490
- a = Integer []
490
+ a = Int64 []
491
491
push! (a, c)
492
492
# check whether this n-gram is both start and complete
493
493
if iscomplete (a, i2f, tokenized= tokenized, sep_token= sep_token)
@@ -499,7 +499,7 @@ function build_paths(
499
499
end
500
500
501
501
for i in 2 : max_t
502
- tmp_working_q = Queue {Array{Integer , 1}} ()
502
+ tmp_working_q = Queue {Array{Int64 , 1}} ()
503
503
while ! isempty (working_q)
504
504
a = dequeue! (working_q) # # a = [11] Al[11,5] == 1 # candidates = [1, 5, 7]
505
505
for c in candidates_t # # c = 5 # a = [11, 1, 5, 7] # a = [11, 1] [11, 5] [11, 7]
@@ -528,17 +528,17 @@ function build_paths(
528
528
end
529
529
530
530
"""
531
- eval_can(::Vector{Vector{Tuple{Vector{Integer }, Integer }}},::Union{SparseMatrixCSC, Matrix},::Union{SparseMatrixCSC, Matrix},::Dict,::Integer ,::Bool)
531
+ eval_can(::Vector{Vector{Tuple{Vector{Int64 }, Int64 }}},::Union{SparseMatrixCSC, Matrix},::Union{SparseMatrixCSC, Matrix},::Dict,::Int64 ,::Bool)
532
532
533
533
at the end of finding path algorithms, each candidates need to be evaluated
534
534
regarding their predicted semantic vectors
535
535
"""
536
536
function eval_can (
537
- candidates:: Vector {Vector{Tuple{Vector{Integer }, Integer }}},
537
+ candidates:: Vector {Vector{Tuple{Vector{Int64 }, Int64 }}},
538
538
S:: Union{SparseMatrixCSC, Matrix} ,
539
539
F:: Union{SparseMatrixCSC, Matrix} ,
540
540
i2f:: Dict ,
541
- max_can:: Integer ,
541
+ max_can:: Int64 ,
542
542
verbose= false :: Bool
543
543
):: Array{Array{Result_Path_Info_Struct,1},1}
544
544
@@ -551,7 +551,7 @@ function eval_can(
551
551
res = Result_Path_Info_Struct[]
552
552
if size (candidates[i], 1 ) > 0
553
553
for (ci,n) in candidates[i] # ci = [1,3,4]
554
- Chat = zeros (Integer , length (i2f))
554
+ Chat = zeros (Int64 , length (i2f))
555
555
Chat[ci] .= 1
556
556
Shat = Chat' * F
557
557
Scor = cor (Shat[1 , :], S[i, :])
@@ -575,9 +575,9 @@ function find_top_feature_indices(
575
575
# Chat_val::Union{SparseMatrixCSC, Matrix},
576
576
rC:: Matrix ,
577
577
C_train_ind:: Array ;
578
- n_neighbors= 10 :: Integer ,
578
+ n_neighbors= 10 :: Int64 ,
579
579
verbose= false :: Bool
580
- ):: Vector{Vector{Integer }}
580
+ ):: Vector{Vector{Int64 }}
581
581
582
582
# collect num of val data
583
583
n_val = size (rC, 1 )
@@ -587,7 +587,7 @@ function find_top_feature_indices(
587
587
# display(rC)
588
588
589
589
# initialize features list for all candidates
590
- features_all = Vector {Vector{Integer }} (undef, n_val)
590
+ features_all = Vector {Vector{Int64 }} (undef, n_val)
591
591
592
592
# create iter for tqdm
593
593
verbose && println (" finding all n_neighbors features..." )
0 commit comments