From 86da7344698f1b32f522cca2cc21e9121da8ee4a Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Mon, 19 Nov 2018 18:35:28 -0500 Subject: [PATCH 01/24] is_apple depreciated --- deps/build.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deps/build.jl b/deps/build.jl index 17af91f..a8fda36 100644 --- a/deps/build.jl +++ b/deps/build.jl @@ -5,7 +5,7 @@ flags = ["-fPIC", "-Wall", "-O3", "-shared"] libname = "libmochaext.so" openmp = "-fopenmp" -@static is_apple() ? begin +@static isapple() ? begin if !haskey(ENV, "MOCHA_FORCE_OMP") println("OpenMP is currently not officially supported by OS X Clang compiler yet.") println("(see http://clang-omp.github.io/ to install OpenMP clang extension, or") From 603b47344730797e08402d7b364d4782021790e2 Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Mon, 19 Nov 2018 18:55:16 -0500 Subject: [PATCH 02/24] . --- deps/build.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deps/build.jl b/deps/build.jl index a8fda36..057384c 100644 --- a/deps/build.jl +++ b/deps/build.jl @@ -5,7 +5,7 @@ flags = ["-fPIC", "-Wall", "-O3", "-shared"] libname = "libmochaext.so" openmp = "-fopenmp" -@static isapple() ? begin +@static Sys.isapple() ? begin if !haskey(ENV, "MOCHA_FORCE_OMP") println("OpenMP is currently not officially supported by OS X Clang compiler yet.") println("(see http://clang-omp.github.io/ to install OpenMP clang extension, or") From b419d43dcb21029c2e7773924eeefdbf0a0f358c Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Mon, 19 Nov 2018 20:57:34 -0500 Subject: [PATCH 03/24] attempts to update to 1.0.2 --- deps/build.log | 9 +++ .../ijulia/ilsvrc12/imagenet-classifier.ipynb | 57 +++++++++++++++++-- src/logging.jl | 2 +- src/macros.jl | 28 ++++----- 4 files changed, 75 insertions(+), 21 deletions(-) create mode 100644 deps/build.log diff --git a/deps/build.log b/deps/build.log new file mode 100644 index 0000000..edc0e08 --- /dev/null +++ b/deps/build.log @@ -0,0 +1,9 @@ +OpenMP is currently not officially supported by OS X Clang compiler yet. +(see http://clang-omp.github.io/ to install OpenMP clang extension, or +install gcc. + +Disabling OpenMP. To force enable OpenMP, set MOCHA_FORCE_OMP environment +variable and set the CXX environment variable to the appropriate value +to invoke GCC's g++ frontend, such as g++-5 + +Running `g++ -fPIC -Wall -O3 -shared '' -o libmochaext.so im2col.cpp pooling.cpp` diff --git a/examples/ijulia/ilsvrc12/imagenet-classifier.ipynb b/examples/ijulia/ilsvrc12/imagenet-classifier.ipynb index f2e762a..5345197 100644 --- a/examples/ijulia/ilsvrc12/imagenet-classifier.ipynb +++ b/examples/ijulia/ilsvrc12/imagenet-classifier.ipynb @@ -43,9 +43,17 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 3, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "┌ Info: Precompiling Mocha [f17d6557-5fdd-57bf-a30c-27e301b4ff87]\n", + "└ @ Base loading.jl:1186\n" + ] + }, { "name": "stdout", "output_type": "stream", @@ -53,8 +61,45 @@ "Configuring Mocha...\n", " * CUDA disabled by default\n", " * Native Ext disabled by default\n", - "Mocha configured, continue loading module...\n", - "DefaultBackend = Mocha.CPUBackend\n" + "Mocha configured, continue loading module...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "ERROR: LoadError: LoadError: syntax: \"$\" expression outside quote\n", + "Stacktrace:\n", + " [1] include at ./boot.jl:317 [inlined]\n", + " [2] include_relative(::Module, ::String) at ./loading.jl:1038\n", + " [3] include at ./sysimg.jl:29 [inlined]\n", + " [4] include(::String) at /Users/IanB/Leuko/GitHub/Mocha.jl/src/Mocha.jl:1\n", + " [5] top-level scope at none:0\n", + " [6] include at ./boot.jl:317 [inlined]\n", + " [7] include_relative(::Module, ::String) at ./loading.jl:1038\n", + " [8] include(::Module, ::String) at ./sysimg.jl:29\n", + " [9] top-level scope at none:2\n", + " [10] eval at ./boot.jl:319 [inlined]\n", + " [11] eval(::Expr) at ./client.jl:397\n", + " [12] top-level scope at ./none:3\n", + "in expression starting at /Users/IanB/Leuko/GitHub/Mocha.jl/src/macros.jl:33\n", + "in expression starting at /Users/IanB/Leuko/GitHub/Mocha.jl/src/Mocha.jl:18\n" + ] + }, + { + "ename": "ErrorException", + "evalue": "Failed to precompile Mocha [f17d6557-5fdd-57bf-a30c-27e301b4ff87] to /Users/IanB/.julia/compiled/v1.1/Mocha/Kv2xC.ji.", + "output_type": "error", + "traceback": [ + "Failed to precompile Mocha [f17d6557-5fdd-57bf-a30c-27e301b4ff87] to /Users/IanB/.julia/compiled/v1.1/Mocha/Kv2xC.ji.", + "", + "Stacktrace:", + " [1] error(::String) at ./error.jl:33", + " [2] compilecache(::Base.PkgId, ::String) at ./loading.jl:1197", + " [3] _require(::Base.PkgId) at ./loading.jl:960", + " [4] require(::Base.PkgId) at ./loading.jl:858", + " [5] require(::Module, ::Symbol) at ./loading.jl:853", + " [6] top-level scope at In[3]:1" ] } ], @@ -4131,15 +4176,15 @@ ], "metadata": { "kernelspec": { - "display_name": "Julia 0.6.1", + "display_name": "Julia 1.1.0-DEV", "language": "julia", - "name": "julia-0.6" + "name": "julia-1.1" }, "language_info": { "file_extension": ".jl", "mimetype": "application/julia", "name": "julia", - "version": "0.6.1" + "version": "1.1.0" } }, "nbformat": 4, diff --git a/src/logging.jl b/src/logging.jl index ae51658..6733d94 100644 --- a/src/logging.jl +++ b/src/logging.jl @@ -3,7 +3,7 @@ export m_debug, m_info, m_notice, m_warn, m_error # NOTE: It isn't generally recommended to configure your logger at the package/library level. push!(getlogger(Mocha), - DefaultHandler(STDOUT, DefaultFormatter("[{date} | {level} | {name}]: {msg}"))) + DefaultHandler(stdout, DefaultFormatter("[{date} | {level} | {name}]: {msg}"))) setlevel!(getlogger(Mocha), "info") setpropagating!(getlogger(Mocha), false) diff --git a/src/macros.jl b/src/macros.jl index dd81288..b36fa6a 100644 --- a/src/macros.jl +++ b/src/macros.jl @@ -56,22 +56,22 @@ macro defstruct(name, super_name, fields) type_body = Expr(:block, field_defs...) # constructor - asserts = map(filter(i -> isassigned(field_asserts,i), 1:length(fields))) do i - :(@assert($(field_asserts[i]))) - end - construct = Expr(:call, name, field_names...) - ctor_body = Expr(:block, asserts..., construct) - ctor_def = Expr(:call, name, Expr(:parameters, field_defaults...)) - ctor = Expr(:(=), ctor_def, ctor_body) + quote + asserts = map(filter(i -> isassigned(field_asserts,i), 1:length(fields))) do i + :(@assert($(field_asserts[i]))) + end + construct = Expr(:call, name, field_names...) + ctor_body = Expr(:block, asserts..., construct) + ctor_def = Expr(:call, name, Expr(:parameters, field_defaults...)) + ctor = Expr(:(=), ctor_def, ctor_body) - # for copy constructor - field_assigns = Expr(:block, [:(params[symbol($(esc(string(fname))))] = proto.$fname) for fname in field_names]...) - field_expose = Expr(:block, [:($(esc(fname)) = params[symbol($(esc(string(fname))))]) for fname in field_names]...) - assert_block = Expr(:block, asserts...) - obj_construct = Expr(:call, name, field_names...) - copy_fname = esc(:copy) + # for copy constructor + field_assigns = Expr(:block, [:(params[symbol($(esc(string(fname))))] = proto.$fname) for fname in field_names]...) + field_expose = Expr(:block, [:($(esc(fname)) = params[symbol($(esc(string(fname))))]) for fname in field_names]...) + assert_block = Expr(:block, asserts...) + obj_construct = Expr(:call, name, field_names...) + copy_fname = esc(:copy) - quote immutable $(name) <: $super_name $type_body end From 074e4a00f01b4fc0307cc1139280f575cab8c2d9 Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Mon, 19 Nov 2018 21:40:35 -0500 Subject: [PATCH 04/24] immutable to struct --- src/blob.jl | 2 +- src/data-transformers.jl | 2 +- src/macros.jl | 31 ++++++++++++++++--------------- src/solvers.jl | 2 +- src/solvers/adadelta.jl | 2 +- src/solvers/adagrad.jl | 2 +- src/solvers/adam.jl | 2 +- src/solvers/nesterov.jl | 2 +- src/solvers/sgd.jl | 2 +- 9 files changed, 24 insertions(+), 23 deletions(-) diff --git a/src/blob.jl b/src/blob.jl index 58f9916..26b114b 100644 --- a/src/blob.jl +++ b/src/blob.jl @@ -125,7 +125,7 @@ end ############################################################ # A Blob for CPU Computation ############################################################ -immutable CPUBlob{T <: AbstractFloat, N} <: Blob{T, N} +struct CPUBlob{T <: AbstractFloat, N} <: Blob{T, N} data :: AbstractArray{T, N} end CPUBlob{N}(t :: Type, dims::NTuple{N,Int}) = CPUBlob(Array{t}(dims)) diff --git a/src/data-transformers.jl b/src/data-transformers.jl index cfc62a0..eb45f75 100644 --- a/src/data-transformers.jl +++ b/src/data-transformers.jl @@ -14,7 +14,7 @@ immutable SubMean <: DataTransformerType end SubMean(;mean_file="", mean_blob=NullBlob()) = SubMean(mean_file, mean_blob) -immutable Scale <: DataTransformerType +struct Scale <: DataTransformerType scale :: AbstractFloat end Scale(;scale=1.0) = Scale(scale) diff --git a/src/macros.jl b/src/macros.jl index b36fa6a..7c481fa 100644 --- a/src/macros.jl +++ b/src/macros.jl @@ -56,23 +56,24 @@ macro defstruct(name, super_name, fields) type_body = Expr(:block, field_defs...) # constructor - quote - asserts = map(filter(i -> isassigned(field_asserts,i), 1:length(fields))) do i - :(@assert($(field_asserts[i]))) - end - construct = Expr(:call, name, field_names...) - ctor_body = Expr(:block, asserts..., construct) - ctor_def = Expr(:call, name, Expr(:parameters, field_defaults...)) - ctor = Expr(:(=), ctor_def, ctor_body) + + asserts = map(filter(i -> isassigned(field_asserts,i), 1:length(fields))) do i + :(@assert($(field_asserts[i]))) + end + construct = Expr(:call, name, field_names...) + ctor_body = Expr(:block, asserts..., construct) + ctor_def = Expr(:call, name, Expr(:parameters, field_defaults...)) + ctor = Expr(:(=), ctor_def, ctor_body) - # for copy constructor - field_assigns = Expr(:block, [:(params[symbol($(esc(string(fname))))] = proto.$fname) for fname in field_names]...) - field_expose = Expr(:block, [:($(esc(fname)) = params[symbol($(esc(string(fname))))]) for fname in field_names]...) - assert_block = Expr(:block, asserts...) - obj_construct = Expr(:call, name, field_names...) - copy_fname = esc(:copy) + # for copy constructor + field_assigns = Expr(:block, [:(params[symbol($(esc(string(fname))))] = proto.$fname) for fname in field_names]...) + field_expose = Expr(:block, [:($(esc(fname)) = params[symbol($(esc(string(fname))))]) for fname in field_names]...) + assert_block = Expr(:block, asserts...) + obj_construct = Expr(:call, name, field_names...) + copy_fname = esc(:copy) - immutable $(name) <: $super_name + quote + struct $(name) <: $super_name $type_body end diff --git a/src/solvers.jl b/src/solvers.jl index 88632c8..07f4670 100644 --- a/src/solvers.jl +++ b/src/solvers.jl @@ -10,7 +10,7 @@ import Base.Meta: quot @compat abstract type InternalSolverState end # All the state a solver needs to update an iteration const SolverParameters = Dict{Symbol,Any} -immutable Solver{T<:SolverMethod} +struct Solver{T<:SolverMethod} method :: T params :: SolverParameters coffee_lounge :: Any # forward declaration diff --git a/src/solvers/adadelta.jl b/src/solvers/adadelta.jl index c86dc9e..27282ed 100644 --- a/src/solvers/adadelta.jl +++ b/src/solvers/adadelta.jl @@ -5,7 +5,7 @@ export Adadelta -immutable Adadelta <: SolverMethod +struct Adadelta <: SolverMethod end make_solver_parameters(method::Adadelta; kwargs...) = diff --git a/src/solvers/adagrad.jl b/src/solvers/adagrad.jl index e0f5573..209f9e9 100644 --- a/src/solvers/adagrad.jl +++ b/src/solvers/adagrad.jl @@ -7,7 +7,7 @@ export Adagrad -immutable Adagrad <: SolverMethod +struct Adagrad <: SolverMethod end make_solver_parameters(method::Adagrad; kwargs...)= diff --git a/src/solvers/adam.jl b/src/solvers/adam.jl index 4ce4d5c..56c1edd 100644 --- a/src/solvers/adam.jl +++ b/src/solvers/adam.jl @@ -1,6 +1,6 @@ export Adam -immutable Adam <: SolverMethod +struct Adam <: SolverMethod end function make_solver_parameters(solver::Adam; kwargs...) diff --git a/src/solvers/nesterov.jl b/src/solvers/nesterov.jl index 3d7d30f..b1d6d4d 100644 --- a/src/solvers/nesterov.jl +++ b/src/solvers/nesterov.jl @@ -5,7 +5,7 @@ # Optimizing Recurrent Networks. arXiv:1212.0901 [cs.LG] -immutable Nesterov <: SolverMethod +struct Nesterov <: SolverMethod end type NesterovSolverState <: InternalSolverState learning_rate :: Float64 diff --git a/src/solvers/sgd.jl b/src/solvers/sgd.jl index 14cd74d..f4ea7d4 100644 --- a/src/solvers/sgd.jl +++ b/src/solvers/sgd.jl @@ -1,6 +1,6 @@ export SGD -immutable SGD <: SolverMethod +struct SGD <: SolverMethod end const defaultDict = @compat Dict(:lr_policy => LRPolicy.Fixed(0.01), :mom_policy => MomPolicy.Fixed(0.)) From e4848ea44e8b77cb9d858120367d64a3d0ab0d48 Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Mon, 19 Nov 2018 21:43:28 -0500 Subject: [PATCH 05/24] 1.0.2 updates --- src/constraints.jl | 4 ++-- src/cuda/blob.jl | 2 +- src/cuda/cublas.jl | 2 +- src/cuda/cuda.jl | 12 ++++++------ src/cuda/cudnn.jl | 2 +- src/data-transformers.jl | 2 +- src/initializers.jl | 10 +++++----- src/regularizers.jl | 6 +++--- 8 files changed, 20 insertions(+), 20 deletions(-) diff --git a/src/constraints.jl b/src/constraints.jl index 52a1370..26c3f6a 100644 --- a/src/constraints.jl +++ b/src/constraints.jl @@ -3,13 +3,13 @@ export constrain! @compat abstract type Constraint end -immutable NoCons <: Constraint +struct NoCons <: Constraint threshold :: AbstractFloat # not used, just for consistent API every_n_iter :: Int # also not used end NoCons() = NoCons(0.0, 0) -immutable L2Cons <: Constraint +struct L2Cons <: Constraint threshold :: AbstractFloat every_n_iter :: Int end diff --git a/src/cuda/blob.jl b/src/cuda/blob.jl index dcd2219..7c19309 100644 --- a/src/cuda/blob.jl +++ b/src/cuda/blob.jl @@ -3,7 +3,7 @@ using .CUDA export CuBlobDescriptor, CuPODBlobDescriptor, CuTensorBlobDescriptor, CuFilterBlobDescriptor export CuTensorBlob -immutable CuTensorBlob{T<:AbstractFloat,N} <: Blob{T,N} +struct CuTensorBlob{T<:AbstractFloat,N} <: Blob{T,N} ptr :: CuPtr shape :: NTuple{N, Int} len :: Int diff --git a/src/cuda/cublas.jl b/src/cuda/cublas.jl index c041164..479b737 100644 --- a/src/cuda/cublas.jl +++ b/src/cuda/cublas.jl @@ -15,7 +15,7 @@ const CUBLAS_STATUS_INTERNAL_ERROR = 14 const CUBLAS_STATUS_NOT_SUPPORTED = 15 const CUBLAS_STATUS_LICENSE_ERROR = 16 -immutable CuBLASError <: Exception +struct CuBLASError <: Exception code :: Int end using Compat diff --git a/src/cuda/cuda.jl b/src/cuda/cuda.jl index 9169774..a615e83 100644 --- a/src/cuda/cuda.jl +++ b/src/cuda/cuda.jl @@ -73,7 +73,7 @@ const driver_error_descriptions = @compat(Dict( 999 => "Unknown error" )) -immutable CuDriverError <: Exception +struct CuDriverError <: Exception code::Int end @@ -103,7 +103,7 @@ cubox{T}(x::T) = T[x] ############################################################ # Device and Context ############################################################ -immutable CuDevice +struct CuDevice ordinal::Cint handle::Cint @@ -116,7 +116,7 @@ immutable CuDevice end end -immutable CuContext +struct CuContext handle::Ptr{Void} end @@ -167,7 +167,7 @@ end ############################################################ # CUDA streams ############################################################ -immutable CuStream +struct CuStream handle::Ptr{Void} blocking::Bool priority::Int @@ -189,7 +189,7 @@ end ############################################################ # PTX Module and Function ############################################################ -immutable CuModule +struct CuModule handle::Ptr{Void} function CuModule(filename::AbstractString) @@ -204,7 +204,7 @@ function unload(md::CuModule) end -immutable CuFunction +struct CuFunction handle::Ptr{Void} function CuFunction(md::CuModule, name::String) diff --git a/src/cuda/cudnn.jl b/src/cuda/cudnn.jl index 22cafce..600c03e 100644 --- a/src/cuda/cudnn.jl +++ b/src/cuda/cudnn.jl @@ -16,7 +16,7 @@ const CUDNN_STATUS_EXECUTION_FAILED = 8 const CUDNN_STATUS_NOT_SUPPORTED = 9 const CUDNN_STATUS_LICENSE_ERROR = 10 -immutable CuDNNError <: Exception +struct CuDNNError <: Exception code :: Int end using Compat diff --git a/src/data-transformers.jl b/src/data-transformers.jl index eb45f75..facd762 100644 --- a/src/data-transformers.jl +++ b/src/data-transformers.jl @@ -8,7 +8,7 @@ module DataTransformers using ..Mocha using Compat -immutable SubMean <: DataTransformerType +struct SubMean <: DataTransformerType mean_file :: AbstractString mean_blob :: Blob end diff --git a/src/initializers.jl b/src/initializers.jl index 7722903..e05bc00 100644 --- a/src/initializers.jl +++ b/src/initializers.jl @@ -7,12 +7,12 @@ export OrthogonalInitializer @compat abstract type Initializer end # The root type of all initializer -immutable NullInitializer <: Initializer end +struct NullInitializer <: Initializer end function init(::NullInitializer, blob::Blob) # do nothing end -immutable ConstantInitializer <: Initializer +struct ConstantInitializer <: Initializer value end @@ -32,7 +32,7 @@ end # For a ND-tensor blob parameter, the product of the 1 ~ (N-1) dimensions # are considered as fan-in, and the last dimension is considered as fan-out. ################################################################################ -immutable XavierInitializer <: Initializer +struct XavierInitializer <: Initializer end function init(initializer::XavierInitializer, blob::Blob) fan_in = get_fea_size(blob) @@ -41,7 +41,7 @@ function init(initializer::XavierInitializer, blob::Blob) copy!(blob, init_val) end -immutable GaussianInitializer <: Initializer +struct GaussianInitializer <: Initializer mean :: AbstractFloat std :: AbstractFloat end @@ -59,7 +59,7 @@ end # ############################################################################# -immutable OrthogonalInitializer <: Initializer +struct OrthogonalInitializer <: Initializer gain::AbstractFloat end OrthogonalInitializer() = OrthogonalInitializer(1.0) # but use OrthogonalInitializer(sqrt(2)) for ReLU units diff --git a/src/regularizers.jl b/src/regularizers.jl index 63e83bf..19928f6 100644 --- a/src/regularizers.jl +++ b/src/regularizers.jl @@ -4,16 +4,16 @@ export forward, backward @compat abstract type Regularizer end -immutable NoRegu <: Regularizer +struct NoRegu <: Regularizer coefficient :: AbstractFloat # not used, just for consistent API end NoRegu() = NoRegu(0.0) -immutable L2Regu <: Regularizer +struct L2Regu <: Regularizer coefficient :: AbstractFloat end -immutable L1Regu <: Regularizer +struct L1Regu <: Regularizer coefficient :: AbstractFloat end From 6862609a1b56d7bb798189dd66703b27a98a1b9a Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Mon, 19 Nov 2018 23:09:43 -0500 Subject: [PATCH 06/24] my attempts before femtocleaner --- Manifest.toml | 252 +++++++++++++++++++++++++++++++++++++++++++ Project.toml | 9 ++ REQUIRE | 5 - src/compatibility.jl | 11 +- src/exception.jl | 2 +- src/utils/blas.jl | 26 ++--- src/utils/math.jl | 12 +-- src/utils/tensor.jl | 2 +- 8 files changed, 292 insertions(+), 27 deletions(-) create mode 100644 Manifest.toml create mode 100644 Project.toml delete mode 100644 REQUIRE diff --git a/Manifest.toml b/Manifest.toml new file mode 100644 index 0000000..da915a9 --- /dev/null +++ b/Manifest.toml @@ -0,0 +1,252 @@ +[[Base64]] +uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" + +[[BinDeps]] +deps = ["Compat", "Libdl", "SHA", "URIParser"] +git-tree-sha1 = "12093ca6cdd0ee547c39b1870e0c9c3f154d9ca9" +uuid = "9e28174c-4ba2-5203-b857-d8d62c4213ee" +version = "0.8.10" + +[[BinaryProvider]] +deps = ["Libdl", "Pkg", "SHA", "Test"] +git-tree-sha1 = "9930c1a6cd49d9fcd7218df6be417e6ae4f1468a" +uuid = "b99e7846-7c00-51b0-8f62-c81ae34c0232" +version = "0.5.2" + +[[Blosc]] +deps = ["BinaryProvider", "CMakeWrapper", "Compat", "Libdl"] +git-tree-sha1 = "71fb23581e1f0b0ae7be8ccf0ebfb3600e23ca41" +uuid = "a74b3585-a348-5f62-a45c-50e91977d574" +version = "0.5.1" + +[[BufferedStreams]] +deps = ["Compat", "Test"] +git-tree-sha1 = "5d55b9486590fdda5905c275bb21ce1f0754020f" +uuid = "e1450e63-4bb3-523b-b2a4-4ffa8c0fd77d" +version = "1.0.0" + +[[CMake]] +deps = ["BinDeps", "Libdl", "Test"] +git-tree-sha1 = "74853a75c26a4a73ac391ee26ee29ebeb5583d9f" +uuid = "631607c0-34d2-5d66-819e-eb0f9aa2061a" +version = "1.1.0" + +[[CMakeWrapper]] +deps = ["BinDeps", "CMake", "Libdl", "Parameters", "Test"] +git-tree-sha1 = "2b43d451639984e3571951cc687b8509b0a86c6d" +uuid = "d5fb7624-851a-54ee-a528-d3f3bac0b4a0" +version = "0.2.2" + +[[Compat]] +deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"] +git-tree-sha1 = "ec61a16eed883ad0cfa002d7489b3ce6d039bb9a" +uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" +version = "1.4.0" + +[[Dates]] +deps = ["Printf"] +uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" + +[[DelimitedFiles]] +deps = ["Mmap"] +uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab" + +[[Distributed]] +deps = ["LinearAlgebra", "Random", "Serialization", "Sockets"] +uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b" + +[[EzXML]] +deps = ["BinaryProvider", "Libdl", "Printf", "Test"] +git-tree-sha1 = "5623d1486bfaadd815f5c4ca501adda02b5337f1" +uuid = "8f5d6c58-4d21-5cfd-889c-e3ad7ee6a615" +version = "0.9.0" + +[[FileIO]] +deps = ["Pkg", "Random", "Test"] +git-tree-sha1 = "884066c07188cc0f57d0ebdcab6c9a09cc734dbc" +uuid = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549" +version = "1.0.3" + +[[HDF5]] +deps = ["BinDeps", "Blosc", "Distributed", "Homebrew", "Libdl", "LinearAlgebra", "Mmap", "Pkg", "Test", "WinRPM"] +git-tree-sha1 = "8c3bcdb44db436cd20106e2381e1c1ac96aa0ee3" +uuid = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" +version = "0.10.2" + +[[HTTPClient]] +deps = ["Compat", "LibCURL"] +git-tree-sha1 = "161d5776ae8e585ac0b8c20fb81f17ab755b3671" +uuid = "0862f596-cf2d-50af-8ef4-f2be67dfa83f" +version = "0.2.1" + +[[Homebrew]] +deps = ["BinDeps", "InteractiveUtils", "JSON", "Libdl", "Test", "Unicode"] +git-tree-sha1 = "5582ec74f735cf8d12e562a2e65c47f34063bd51" +uuid = "d9be37ee-ecc9-5288-90f1-b9ca67657a75" +version = "0.7.0" + +[[InteractiveUtils]] +deps = ["Markdown"] +uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" + +[[JLD]] +deps = ["Compat", "FileIO", "HDF5", "LegacyStrings", "Profile", "Random"] +git-tree-sha1 = "95fd5d7f129918a75d0535aaaf5b8e235e6e0b0b" +uuid = "4138dd39-2aa7-5051-a626-17a0bb65d9c8" +version = "0.9.1" + +[[JSON]] +deps = ["Dates", "Distributed", "Mmap", "Sockets", "Test", "Unicode"] +git-tree-sha1 = "fec8e4d433072731466d37ed0061b3ba7f70eeb9" +uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" +version = "0.19.0" + +[[LegacyStrings]] +deps = ["Compat"] +git-tree-sha1 = "7cfb0bf378fab9ec57b393e3c56a419d6afab876" +uuid = "1b4a561d-cfcb-5daf-8433-43fcf8b4bea3" +version = "0.4.0" + +[[LibCURL]] +deps = ["BinaryProvider", "Compat", "Libdl", "Printf"] +git-tree-sha1 = "6339c87cb76923a3cf947fcd213cbc364355c9c9" +uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21" +version = "0.4.1" + +[[LibExpat]] +deps = ["Compat"] +git-tree-sha1 = "fde352ec13479e2f90e57939da2440fb78c5e388" +uuid = "522f3ed2-3f36-55e3-b6df-e94fee9b0c07" +version = "0.5.0" + +[[LibGit2]] +uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" + +[[Libdl]] +uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" + +[[Libz]] +deps = ["BufferedStreams", "Random", "Test"] +git-tree-sha1 = "d405194ffc0293c3519d4f7251ce51baac9cc871" +uuid = "2ec943e9-cfe8-584d-b93d-64dcb6d567b7" +version = "1.0.0" + +[[LinearAlgebra]] +deps = ["Libdl"] +uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" + +[[Logging]] +uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" + +[[Markdown]] +deps = ["Base64"] +uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" + +[[Memento]] +deps = ["Compat", "JSON", "Nullables", "Syslogs", "TimeZones"] +git-tree-sha1 = "c4ade3575bcc2c180cfa14cf8b3b63eebca51629" +uuid = "f28f55f0-a522-5efc-85c2-fe41dfb9b2d9" +version = "0.10.0" + +[[Mmap]] +uuid = "a63ad114-7e13-5084-954f-fe012c677804" + +[[Mocking]] +deps = ["Compat", "Dates"] +git-tree-sha1 = "4bf69aaf823b119b034e091e16b18311aa191663" +uuid = "78c3b35d-d492-501b-9361-3d52fe80e533" +version = "0.5.7" + +[[Nullables]] +deps = ["Compat"] +git-tree-sha1 = "ae1a63457e14554df2159b0b028f48536125092d" +uuid = "4d1e1d77-625e-5b40-9113-a560ec7a8ecd" +version = "0.0.8" + +[[OrderedCollections]] +deps = ["Random", "Serialization", "Test"] +git-tree-sha1 = "85619a3f3e17bb4761fe1b1fd47f0e979f964d5b" +uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" +version = "1.0.2" + +[[Parameters]] +deps = ["Markdown", "OrderedCollections", "REPL", "Test"] +git-tree-sha1 = "40f540ec96e50c0b2b9efdb11b5e4d0c63f90923" +uuid = "d96e819e-fc66-5662-9728-84c9c7592b0a" +version = "0.10.1" + +[[Pkg]] +deps = ["Dates", "LibGit2", "Markdown", "Printf", "REPL", "Random", "SHA", "UUIDs"] +uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" + +[[Printf]] +deps = ["Unicode"] +uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" + +[[Profile]] +deps = ["Printf"] +uuid = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79" + +[[REPL]] +deps = ["InteractiveUtils", "Markdown", "Sockets"] +uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" + +[[Random]] +deps = ["Serialization"] +uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" + +[[SHA]] +uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" + +[[Serialization]] +uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" + +[[SharedArrays]] +deps = ["Distributed", "Mmap", "Random", "Serialization"] +uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383" + +[[Sockets]] +uuid = "6462fe0b-24de-5631-8697-dd941f90decc" + +[[SparseArrays]] +deps = ["LinearAlgebra", "Random"] +uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" + +[[Statistics]] +deps = ["LinearAlgebra", "SparseArrays"] +uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" + +[[Syslogs]] +deps = ["Compat", "Nullables"] +git-tree-sha1 = "d3e512a044cc8873c741d88758f8e1888c7c47d3" +uuid = "cea106d9-e007-5e6c-ad93-58fe2094e9c4" +version = "0.2.0" + +[[Test]] +deps = ["Distributed", "InteractiveUtils", "Logging", "Random"] +uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[[TimeZones]] +deps = ["Compat", "EzXML", "Mocking", "Nullables"] +git-tree-sha1 = "4a4ab113913e19ad62b67e6c5c056509eac00c19" +uuid = "f269a46b-ccf7-5d73-abea-4c690281aa53" +version = "0.8.2" + +[[URIParser]] +deps = ["Test", "Unicode"] +git-tree-sha1 = "6ddf8244220dfda2f17539fa8c9de20d6c575b69" +uuid = "30578b45-9adc-5946-b283-645ec420af67" +version = "0.4.0" + +[[UUIDs]] +deps = ["Random", "SHA"] +uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" + +[[Unicode]] +uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" + +[[WinRPM]] +deps = ["BinDeps", "Compat", "HTTPClient", "LibExpat", "Libdl", "Libz", "URIParser"] +git-tree-sha1 = "2a889d320f3b77d17c037f295859fe570133cfbf" +uuid = "c17dfb99-b4f7-5aad-8812-456da1ad7187" +version = "0.4.2" diff --git a/Project.toml b/Project.toml new file mode 100644 index 0000000..6912f1b --- /dev/null +++ b/Project.toml @@ -0,0 +1,9 @@ +name = "Mocha" +uuid = "8ac9dfb8-ec70-11e8-02ea-0ddc18c31797" + +[deps] +Compat = "34da2185-b29b-5c13-b0c7-acf172513d20" +HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" +JLD = "4138dd39-2aa7-5051-a626-17a0bb65d9c8" +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +Memento = "f28f55f0-a522-5efc-85c2-fe41dfb9b2d9" diff --git a/REQUIRE b/REQUIRE deleted file mode 100644 index 1e859bf..0000000 --- a/REQUIRE +++ /dev/null @@ -1,5 +0,0 @@ -julia 0.6 -HDF5 0.7.0 -JLD 0.6.6 -Compat 0.9.5 -Memento 0.5 diff --git a/src/compatibility.jl b/src/compatibility.jl index 624b2ec..99bcfcc 100644 --- a/src/compatibility.jl +++ b/src/compatibility.jl @@ -1,5 +1,6 @@ using Compat + if VERSION < v"0.3.3" function blasfunc(name::Symbol) string(name) @@ -8,11 +9,19 @@ elseif VERSION < v"0.5.0-dev+1915" function blasfunc(name::Symbol) Base.blasfunc(name) end -else +elseif VERSION < v"0.7.0-beta2.199" function blasfunc(name::Symbol) str_name = string(name) fnc_symb = eval(:(Base.BLAS.@blasfunc $str_name)) fnc_name = string(fnc_symb) return fnc_name end +else + function blasfunc(name::Symbol) + str_name = string(name) + fnc_symb = eval(:(((Compat.LinearAlgebra).BLAS).@blasfunc($str_name))) + fnc_name = string(fnc_symb) + return fnc_name + end end +export blasfunc \ No newline at end of file diff --git a/src/exception.jl b/src/exception.jl index 41f99d2..2574582 100644 --- a/src/exception.jl +++ b/src/exception.jl @@ -1,6 +1,6 @@ export TopologyError -type TopologyError <: Exception +struct TopologyError <: Exception desc :: AbstractString end Base.showerror(io::IO, e::TopologyError) = print(io, "Illegal Network Topology: ", e.desc) diff --git a/src/utils/blas.jl b/src/utils/blas.jl index cc875d3..ac71b80 100644 --- a/src/utils/blas.jl +++ b/src/utils/blas.jl @@ -1,20 +1,20 @@ export RawBLAS module RawBLAS -using Base.LinAlg # force built-in BLAS library initialization -using ..Mocha.blasfunc +using LinearAlgebra # force built-in BLAS library initialization +using ..Mocha: blasfunc using Compat for (gemm, elty) in ((:dgemm_, Float64), (:sgemm_, Float32)) @eval begin function gemm!(transA::Char, transB::Char, M::Int, N::Int, K::Int, alpha::$elty, A, lda, B, ldb, beta::$elty, C, ldc) - ccall(($(blasfunc(gemm)), Base.libblas_name), Void, - (Ptr{UInt8}, Ptr{UInt8}, Ptr{Base.LinAlg.BlasInt}, Ptr{Base.LinAlg.BlasInt}, - Ptr{Base.LinAlg.BlasInt}, Ptr{$elty}, Ptr{$elty}, Ptr{Base.LinAlg.BlasInt}, - Ptr{$elty}, Ptr{Base.LinAlg.BlasInt}, Ptr{$elty}, Ptr{$elty}, - Ptr{Base.LinAlg.BlasInt}), - &transA, &transB, &M, &N, &K, &alpha, A, &lda, B, &ldb, &beta, C, &ldc) + ccall(($(blasfunc(gemm)), Base.libblas_name), Nothing, + (Ptr{UInt8}, Ptr{UInt8}, Ptr{LinearAlgebra.BlasInt}, Ptr{LinearAlgebra.BlasInt}, + Ptr{LinearAlgebra.BlasInt}, Ptr{$elty}, Ptr{$elty}, Ptr{LinearAlgebra.BlasInt}, + Ptr{$elty}, Ptr{LinearAlgebra.BlasInt}, Ptr{$elty}, Ptr{$elty}, + Ptr{LinearAlgebra.BlasInt}), + Ref(transA), Ref(transB), Ref(M), Ref(N), Ref(K), Ref(alpha), A, Ref(lda), B, Ref(ldb), Ref(beta), C, Ref(ldc)) end function gemm!(transA::Char, transB::Char, M::Int, N::Int, K::Int, alpha::$elty, @@ -30,11 +30,11 @@ end for (gemv, elty) in ((:dgemv_, Float64), (:sgemv_, Float32)) @eval begin function gemv!(trans::Char, M::Int, N::Int, alpha::$elty, A, lda, x, incx, beta::$elty, y, incy) - ccall(($(blasfunc(gemv)), Base.libblas_name), Void, - (Ptr{UInt8}, Ptr{Base.LinAlg.BlasInt}, Ptr{Base.LinAlg.BlasInt}, Ptr{$elty}, - Ptr{$elty}, Ptr{Base.LinAlg.BlasInt}, Ptr{$elty}, Ptr{Base.LinAlg.BlasInt}, - Ptr{$elty}, Ptr{$elty}, Ptr{Base.LinAlg.BlasInt}), - &trans, &M, &N, &alpha, A, &lda, x, &incx, &beta, y, &incy) + ccall(($(blasfunc(gemv)), Base.libblas_name), Nothing, + (Ptr{UInt8}, Ptr{LinearAlgebra.BlasInt}, Ptr{LinearAlgebra.BlasInt}, Ptr{$elty}, + Ptr{$elty}, Ptr{LinearAlgebra.BlasInt}, Ptr{$elty}, Ptr{LinearAlgebra.BlasInt}, + Ptr{$elty}, Ptr{$elty}, Ptr{LinearAlgebra.BlasInt}), + Ref(trans), Ref(M), Ref(N), Ref(alpha), A, Ref(lda), x, Ref(incx), Ref(beta), y, Ref(incy)) end function gemv!(trans::Char, M::Int, N::Int, alpha::$elty, A, x, beta::$elty, y) lda = M diff --git a/src/utils/math.jl b/src/utils/math.jl index a0f075d..d9f86b1 100644 --- a/src/utils/math.jl +++ b/src/utils/math.jl @@ -2,7 +2,7 @@ export Vec module Vec # X[i] += a -function add_scal!{T}(X::Array{T}, a) +function add_scal!(X::Array{T}, a) where {T} leng = length(X) a = convert(eltype(X), a) @simd for i = 1:leng @@ -11,7 +11,7 @@ function add_scal!{T}(X::Array{T}, a) end # X[i] *= a -function mul_scal!{T}(X::Array{T}, a) +function mul_scal!(X::Array{T}, a) where {T} leng = length(X) a = convert(eltype(X), a) @simd for i = 1:leng @@ -20,7 +20,7 @@ function mul_scal!{T}(X::Array{T}, a) end # X[i] *= Y[i] -function mul!{T}(X::Array{T}, Y::Array{T}) +function mul!(X::Array{T}, Y::Array{T}) where {T} leng = length(X) @simd for i = 1:leng @inbounds X[i] *= Y[i] @@ -28,14 +28,14 @@ function mul!{T}(X::Array{T}, Y::Array{T}) end # X[i] = X[i] / Y[i] -function div!{T}(X::Array{T}, Y::Array{T}) +function div!(X::Array{T}, Y::Array{T}) where {T} leng = length(X) @simd for i = 1:leng @inbounds X[i] /= Y[i] end end # Y[i] = X[i] / Y[i] -function div2!{T}(X::Array{T}, Y::Array{T}) +function div2!(X::Array{T}, Y::Array{T}) where {T} leng = length(X) @simd for i = 1:leng @inbounds Y[i] = X[i] / Y[i] @@ -43,7 +43,7 @@ function div2!{T}(X::Array{T}, Y::Array{T}) end # X[i] = X[i]^p -function pow!{T}(X::Array{T}, p::Number) +function pow!(X::Array{T}, p::Number) where {T} leng = length(X) @simd for i = 1:leng @inbounds X[i] = X[i]^p diff --git a/src/utils/tensor.jl b/src/utils/tensor.jl index abbe1d6..f9db8f5 100644 --- a/src/utils/tensor.jl +++ b/src/utils/tensor.jl @@ -2,7 +2,7 @@ export split_dims # Split the dimension of a ND-tensor into 3 parts: # (dim_pre, dim_mid, dim_post) -function split_dims{T}(tensor::T, dim::Int) +function split_dims(tensor::T, dim::Int) where {T} dims = size(tensor) dim_pre ::Int = prod(dims[1:dim-1]) dim_mid ::Int = dims[dim] From d2c36c176d6a07bad82f4b08236e44af4ed368a5 Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Mon, 19 Nov 2018 23:59:55 -0500 Subject: [PATCH 07/24] Manual midway point --- Project.toml | 1 + benchmarks/native-im2col/im2col-bm.jl | 2 +- benchmarks/native-pooling/pooling-bm.jl | 2 +- src/Mocha.jl | 2 +- src/backend.jl | 2 +- src/blob.jl | 26 ++++---- src/constraints.jl | 4 +- src/cuda/blob.jl | 4 +- src/cuda/cublas.jl | 44 ++++++------- src/cuda/cuda.jl | 42 ++++++------- src/cuda/cudnn.jl | 82 ++++++++++++------------- src/cuda/layers/hdf5-data.jl | 4 +- src/cuda/utils/math.jl | 14 ++--- src/data-transformers.jl | 4 +- src/layers/hdf5-data.jl | 2 +- src/layers/memory-data.jl | 2 +- src/layers/pooling/native-impl.jl | 16 ++--- src/macros.jl | 10 +-- src/neurons.jl | 14 ++--- src/parameter.jl | 6 +- src/pooling-functions.jl | 6 +- src/solvers.jl | 2 +- src/utils/im2col-native.jl | 8 +-- src/utils/im2col.jl | 16 ++--- src/utils/ref-count.jl | 2 +- test/layers/lrn.jl | 12 ++-- test/layers/random-normal.jl | 4 +- 27 files changed, 167 insertions(+), 166 deletions(-) diff --git a/Project.toml b/Project.toml index 6912f1b..c93c7e6 100644 --- a/Project.toml +++ b/Project.toml @@ -7,3 +7,4 @@ HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" JLD = "4138dd39-2aa7-5051-a626-17a0bb65d9c8" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Memento = "f28f55f0-a522-5efc-85c2-fe41dfb9b2d9" +Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" diff --git a/benchmarks/native-im2col/im2col-bm.jl b/benchmarks/native-im2col/im2col-bm.jl index de8ff91..c78e3a3 100644 --- a/benchmarks/native-im2col/im2col-bm.jl +++ b/benchmarks/native-im2col/im2col-bm.jl @@ -54,7 +54,7 @@ function im2col_native(img::Array{Float64}, col::Array{Float64}, width::Int, hei pad_w, pad_h = pad stride_w, stride_h = stride - ccall(func_handle, Void, + ccall(func_handle, Nothing, (Ptr{Float64},Ptr{Float64}, Cint, Cint, Cint, Cint, Cint, # kernel Cint, Cint, # pad diff --git a/benchmarks/native-pooling/pooling-bm.jl b/benchmarks/native-pooling/pooling-bm.jl index d22d783..30ff0cb 100644 --- a/benchmarks/native-pooling/pooling-bm.jl +++ b/benchmarks/native-pooling/pooling-bm.jl @@ -77,7 +77,7 @@ function pooling_native(input::Array, output::Array, mask::Array, kernel, pad, s pooled_width = int(ceil(float(width +2*pad[1]-kernel[1]) / stride[1]))+1 pooled_height = int(ceil(float(height+2*pad[2]-kernel[2]) / stride[2]))+1 - ccall(func_handle, Void, (Ptr{Float64}, Ptr{Float64}, Ptr{Csize_t}, Cint, Cint, Cint, Cint, + ccall(func_handle, Nothing, (Ptr{Float64}, Ptr{Float64}, Ptr{Csize_t}, Cint, Cint, Cint, Cint, Cint, Cint, # pooled_width, pooled_height Cint, Cint, Cint, Cint, Cint, Cint, # kernel, pad, stride ), input, output, mask, width, height, channels, num, pooled_width, pooled_height, diff --git a/src/Mocha.jl b/src/Mocha.jl index 8b2d953..ed0d486 100644 --- a/src/Mocha.jl +++ b/src/Mocha.jl @@ -1,6 +1,6 @@ module Mocha -using Memento +using Memento, Printf const logger = getlogger(Mocha) diff --git a/src/backend.jl b/src/backend.jl index 0063eb6..7e8c9de 100644 --- a/src/backend.jl +++ b/src/backend.jl @@ -32,7 +32,7 @@ function registry_get(backend::Backend, key::AbstractString) return get(backend.param_registry, key, nothing) end -type CPUBackend <: Backend +struct CPUBackend <: Backend param_registry :: ParameterRegistry CPUBackend() = new(ParameterRegistry()) diff --git a/src/blob.jl b/src/blob.jl index 26b114b..3a55c7b 100644 --- a/src/blob.jl +++ b/src/blob.jl @@ -21,11 +21,11 @@ export make_blob, make_zero_blob, reshape_blob # and mainly for components that do not need # to know the underlying backend (e.g. Filler). ############################################################ -function eltype{T}(blob :: Blob{T}) +function eltype(blob :: Blob{T}) where {T} T end -function ndims{T,N}(blob :: Blob{T,N}) +function ndims(blob :: Blob{T,N}) where {T,N} N end function size(blob :: Blob) # should return the size of data @@ -34,7 +34,7 @@ end function destroy(blob :: Blob) # should destroy the blob error("destroy not implemented for type $(typeof(blob))") end -function size{T,N}(blob :: Blob{T,N}, dim :: Int) +function size(blob :: Blob{T,N}, dim :: Int) where {T,N} if dim < 0 dim = N+1 + dim end @@ -89,7 +89,7 @@ end ############################################################ # A Dummy Blob type holding nothing ############################################################ -type NullBlob <: Blob{Void, 0} +struct NullBlob <: Blob{Nothing, 0} end function fill!(dst :: NullBlob, val) # do nothing @@ -109,7 +109,7 @@ function make_blob(backend::Backend, data::Array) copy!(blob, data) return blob end -function make_zero_blob{N}(backend::Backend, data_type::Type, dims::NTuple{N,Int}) +function make_zero_blob(backend::Backend, data_type::Type, dims::NTuple{N,Int}) where {N} blob = make_blob(backend, data_type, dims) erase!(blob) return blob @@ -128,13 +128,13 @@ end struct CPUBlob{T <: AbstractFloat, N} <: Blob{T, N} data :: AbstractArray{T, N} end -CPUBlob{N}(t :: Type, dims::NTuple{N,Int}) = CPUBlob(Array{t}(dims)) +CPUBlob(t :: Type, dims::NTuple{N,Int}) where {N} = CPUBlob(Array{t}(dims)) -function make_blob{N}(backend::CPUBackend, data_type::Type, dims::NTuple{N,Int}) +function make_blob(backend::CPUBackend, data_type::Type, dims::NTuple{N,Int}) where {N} return CPUBlob(data_type, dims) end -function reshape_blob{T,N1,N2}(backend::CPUBackend, blob::CPUBlob{T,N1}, dims::NTuple{N2,Int}) +function reshape_blob(backend::CPUBackend, blob::CPUBlob{T,N1}, dims::NTuple{N2,Int}) where {T,N1,N2} @assert prod(dims) == length(blob) return CPUBlob{T,N2}(reshape(blob.data, dims)) end @@ -144,21 +144,21 @@ end size(blob::CPUBlob) = size(blob.data) -function copy!{T}(dst :: Array{T}, src :: CPUBlob{T}) +function copy!(dst :: Array{T}, src :: CPUBlob{T}) where {T} @assert length(dst) == length(src) dst[:] = src.data[:] end -function copy!{T}(dst :: CPUBlob{T}, src :: Array{T}) +function copy!(dst :: CPUBlob{T}, src :: Array{T}) where {T} @assert length(dst) == length(src) dst.data[:] = src[:] end -function copy!{T}(dst :: CPUBlob{T}, src :: CPUBlob{T}) +function copy!(dst :: CPUBlob{T}, src :: CPUBlob{T}) where {T} dst.data[:] = src.data[:] end -function fill!{T}(dst :: CPUBlob{T}, src) +function fill!(dst :: CPUBlob{T}, src) where {T} fill!(dst.data, src) end -function randn!{T}(dst :: CPUBlob{T}) +function randn!(dst :: CPUBlob{T}) where {T} randn!(dst.data) end diff --git a/src/constraints.jl b/src/constraints.jl index 26c3f6a..9c0fefd 100644 --- a/src/constraints.jl +++ b/src/constraints.jl @@ -26,8 +26,8 @@ end # L2 norm constraint on the weights ############################################################ -function apply_l2_cons!{T <: AbstractFloat}(backend::CPUBackend, blob::CPUBlob{T}, - threshold::AbstractFloat, ninputs::Int, nunits::Int) +function apply_l2_cons!(backend::CPUBackend, blob::CPUBlob{T}, + threshold::AbstractFloat, ninputs::Int, nunits::Int) where {T <: AbstractFloat} param = reshape(blob.data, (ninputs, nunits)) # we constrain each column vector for i = 1:nunits diff --git a/src/cuda/blob.jl b/src/cuda/blob.jl index 7c19309..bf8a15b 100644 --- a/src/cuda/blob.jl +++ b/src/cuda/blob.jl @@ -27,7 +27,7 @@ function copy!{T}(dst :: Array{T}, src :: CuTensorBlob{T}) end function copy!{T}(dst :: CuTensorBlob{T}, src :: CuTensorBlob{T}) @assert length(dst) == length(src) - @CUDA.cucall(:cuMemcpy, (Ptr{Void}, Ptr{Void}, Cint), dst.ptr.p, src.ptr.p, length(dst)*sizeof(T)) + @CUDA.cucall(:cuMemcpy, (Ptr{Nothing}, Ptr{Nothing}, Cint), dst.ptr.p, src.ptr.p, length(dst)*sizeof(T)) end function fill!{T}(dst :: CuTensorBlob{T}, val) val_vec = Array{T}(length(dst)) @@ -35,7 +35,7 @@ function fill!{T}(dst :: CuTensorBlob{T}, val) copy!(dst, val_vec) end function erase!{T}(dst :: CuTensorBlob{T}) - @CUDA.cucall(:cuMemsetD8_v2, (Ptr{Void}, Cuchar, Csize_t), dst.ptr.p, 0, length(dst)*sizeof(T)) + @CUDA.cucall(:cuMemsetD8_v2, (Ptr{Nothing}, Cuchar, Csize_t), dst.ptr.p, 0, length(dst)*sizeof(T)) end function make_blob{N}(backend::GPUBackend, data_type::Type, dims::NTuple{N,Int}) diff --git a/src/cuda/cublas.jl b/src/cuda/cublas.jl index 479b737..c34e6a5 100644 --- a/src/cuda/cublas.jl +++ b/src/cuda/cublas.jl @@ -54,8 +54,8 @@ macro cublascall(fv, argtypes, args...) end end -const Handle = Ptr{Void} -const StreamHandle = Ptr{Void} +const Handle = Ptr{Nothing} +const StreamHandle = Ptr{Nothing} function create() handle = Handle[0] @@ -78,17 +78,17 @@ end ############################################################ # Copy a vector from host to device ############################################################ -function set_vector(n::Int, elem_size::Int, src::Ptr{Void}, incx::Int, dest::Ptr{Void}, incy::Int) - @cublascall(:cublasSetVector, (Cint, Cint, Ptr{Void}, Cint, Ptr{Void}, Cint), +function set_vector(n::Int, elem_size::Int, src::Ptr{Nothing}, incx::Int, dest::Ptr{Nothing}, incy::Int) + @cublascall(:cublasSetVector, (Cint, Cint, Ptr{Nothing}, Cint, Ptr{Nothing}, Cint), n, elem_size, src, incx, dest, incy) end -function set_vector(n::Int, elem_size::Int, src::Ptr{Void}, incx::Int, dest::CuPtr, incy::Int) - set_vector(n, elem_size, src, incx, Base.unsafe_convert(Ptr{Void}, dest.p), incy) +function set_vector(n::Int, elem_size::Int, src::Ptr{Nothing}, incx::Int, dest::CuPtr, incy::Int) + set_vector(n, elem_size, src, incx, Base.unsafe_convert(Ptr{Nothing}, dest.p), incy) end function set_vector{T}(src::Array{T}, incx::Int, dest::CuPtr, incy::Int) elem_size = sizeof(T) n = length(src) - src_buf = convert(Ptr{Void}, pointer(src)) + src_buf = convert(Ptr{Nothing}, pointer(src)) set_vector(n, elem_size, src_buf, incx, dest, incy) end set_vector{T}(src::Array{T}, dest::CuPtr) = set_vector(src, 1, dest, 1) @@ -96,14 +96,14 @@ set_vector{T}(src::Array{T}, dest::CuPtr) = set_vector(src, 1, dest, 1) ############################################################ # Copy a vector from device to host ############################################################ -function get_vector(n::Int, elem_size::Int, src::CuPtr, incx::Int, dest::Ptr{Void}, incy::Int) - @cublascall(:cublasGetVector, (Cint, Cint, Ptr{Void}, Cint, Ptr{Void}, Cint), - n, elem_size, Base.unsafe_convert(Ptr{Void}, src.p), incx, dest, incy) +function get_vector(n::Int, elem_size::Int, src::CuPtr, incx::Int, dest::Ptr{Nothing}, incy::Int) + @cublascall(:cublasGetVector, (Cint, Cint, Ptr{Nothing}, Cint, Ptr{Nothing}, Cint), + n, elem_size, Base.unsafe_convert(Ptr{Nothing}, src.p), incx, dest, incy) end function get_vector{T}(src::CuPtr, incx::Int, dest::Array{T}, incy::Int) elem_size = sizeof(T) n = length(dest) - dest_buf = convert(Ptr{Void}, pointer(dest)) + dest_buf = convert(Ptr{Nothing}, pointer(dest)) get_vector(n, elem_size, src, incx, dest_buf, incy) end get_vector{T}(src::CuPtr, dest::Array{T}) = get_vector(src, 1, dest, 1) @@ -116,9 +116,9 @@ for (fname, elty) in ((:cublasSscal_v2, :Float32), (:cublasDscal_v2, :Float64)) @eval begin function scal(handle::Handle, n::Int, alpha::$elty, x, incx::Int) - x = Base.unsafe_convert(Ptr{Void}, x) + x = Base.unsafe_convert(Ptr{Nothing}, x) alpha_box = $elty[alpha] - @cublascall($(string(fname)), (Handle, Cint, Ptr{Void}, Ptr{Void}, Cint), + @cublascall($(string(fname)), (Handle, Cint, Ptr{Nothing}, Ptr{Nothing}, Cint), handle, n, alpha_box, x, incx) end function scal(handle::Handle, n::Int, alpha::$elty, x::CuPtr, incx::Int) @@ -134,10 +134,10 @@ for (fname, elty) in ((:cublasSaxpy_v2, :Float32), (:cublasDaxpy_v2, :Float64)) @eval begin function axpy(handle::Handle, n::Int, alpha::$elty, x, incx::Int, y, incy::Int) - x = Base.unsafe_convert(Ptr{Void}, x) - y = Base.unsafe_convert(Ptr{Void}, y) + x = Base.unsafe_convert(Ptr{Nothing}, x) + y = Base.unsafe_convert(Ptr{Nothing}, y) alpha_box = $elty[alpha] - @cublascall($(string(fname)), (Handle, Cint, Ptr{Void}, Ptr{Void}, Cint, Ptr{Void}, Cint), + @cublascall($(string(fname)), (Handle, Cint, Ptr{Nothing}, Ptr{Nothing}, Cint, Ptr{Nothing}, Cint), handle, n, alpha_box, x, incx, y, incy) end function axpy(handle::Handle, n::Int, alpha::$elty, x::CuPtr, incx::Int, y::CuPtr, incy::Int) @@ -154,7 +154,7 @@ for (fname, elty) in ((:cublasSdot_v2, :Float32), @eval begin function dot(handle::Handle, ::Type{$elty}, n::Int, x::CuPtr, incx::Int, y::CuPtr, incy::Int) result = $elty[0] - @cublascall($(string(fname)), (Handle, Cint, Ptr{Void}, Cint, Ptr{Void}, Cint, Ptr{Void}), + @cublascall($(string(fname)), (Handle, Cint, Ptr{Nothing}, Cint, Ptr{Nothing}, Cint, Ptr{Nothing}), handle, n, x.p, incx, y.p, incy, result) return result[1] end @@ -171,9 +171,9 @@ for (fname, elty) in ((:cublasScopy_v2, :Float32), (:cublasDcopy_v2, :Float64)) @eval begin function copy(handle::Handle, ::Type{$elty}, n::Int, x, incx::Int, y, incy::Int) - x = Base.unsafe_convert(Ptr{Void}, (x)) - y = Base.unsafe_convert(Ptr{Void}, (y)) - @cublascall($(string(fname)), (Handle, Cint, Ptr{Void}, Cint, Ptr{Void}, Cint), + x = Base.unsafe_convert(Ptr{Nothing}, (x)) + y = Base.unsafe_convert(Ptr{Nothing}, (y)) + @cublascall($(string(fname)), (Handle, Cint, Ptr{Nothing}, Cint, Ptr{Nothing}, Cint), handle, n, x, incx, y, incy) end end @@ -203,8 +203,8 @@ for (fname, elty) in ((:cublasSgemm_v2, :Float32), @eval begin function gemm_impl(handle::Handle, trans_a::Int, trans_b::Int, m::Int, n::Int, k::Int, alpha_box::Array{$elty}, A::CuPtr, lda::Int, B::CuPtr, ldb::Int, beta_box::Array{$elty}, C::CuPtr, ldc::Int) - @cublascall($(string(fname)), (Handle, Cint,Cint, Cint,Cint,Cint, Ptr{Void}, - Ptr{Void},Cint, Ptr{Void},Cint, Ptr{Void}, Ptr{Void},Cint), + @cublascall($(string(fname)), (Handle, Cint,Cint, Cint,Cint,Cint, Ptr{Nothing}, + Ptr{Nothing},Cint, Ptr{Nothing},Cint, Ptr{Nothing}, Ptr{Nothing},Cint), handle, trans_a, trans_b, m, n, k, alpha_box, A.p, lda, B.p, ldb, beta_box, C.p, ldc) end end diff --git a/src/cuda/cuda.jl b/src/cuda/cuda.jl index a615e83..c84077b 100644 --- a/src/cuda/cuda.jl +++ b/src/cuda/cuda.jl @@ -117,7 +117,7 @@ struct CuDevice end struct CuContext - handle::Ptr{Void} + handle::Ptr{Nothing} end const CTX_SCHED_AUTO = 0x00 @@ -128,21 +128,21 @@ const CTX_MAP_HOST = 0x08 const CTX_LMEM_RESIZE_TO_MAX = 0x10 function create_context(dev::CuDevice, flags::Integer) - a = Array{Ptr{Void}}(1) - @cucall(:cuCtxCreate_v2, (Ptr{Ptr{Void}}, Cuint, Cint), a, flags, dev.handle) + a = Array{Ptr{Nothing}}(1) + @cucall(:cuCtxCreate_v2, (Ptr{Ptr{Nothing}}, Cuint, Cint), a, flags, dev.handle) return CuContext(a[1]) end create_context(dev::CuDevice) = create_context(dev, 0) function destroy(ctx::CuContext) - @cucall(:cuCtxDestroy_v2, (Ptr{Void},), ctx.handle) + @cucall(:cuCtxDestroy_v2, (Ptr{Nothing},), ctx.handle) end ############################################################ # Memory allocation ############################################################ -const CUdeviceptr = Ptr{Void} +const CUdeviceptr = Ptr{Nothing} type CuPtr p::CUdeviceptr @@ -168,21 +168,21 @@ end # CUDA streams ############################################################ struct CuStream - handle::Ptr{Void} + handle::Ptr{Nothing} blocking::Bool priority::Int end function null_stream() - CuStream(convert(Ptr{Void}, 0), true, 0) + CuStream(convert(Ptr{Nothing}, 0), true, 0) end function destroy(s::CuStream) - @cucall(:cuStreamDestroy_v2, (Ptr{Void},), s.handle) + @cucall(:cuStreamDestroy_v2, (Ptr{Nothing},), s.handle) end function synchronize(s::CuStream) - @cucall(:cuStreamSynchronize, (Ptr{Void},), s.handle) + @cucall(:cuStreamSynchronize, (Ptr{Nothing},), s.handle) end @@ -190,26 +190,26 @@ end # PTX Module and Function ############################################################ struct CuModule - handle::Ptr{Void} + handle::Ptr{Nothing} function CuModule(filename::AbstractString) - a = Array{Ptr{Void}}(1) - @cucall(:cuModuleLoad, (Ptr{Ptr{Void}}, Ptr{Cchar}), a, filename) + a = Array{Ptr{Nothing}}(1) + @cucall(:cuModuleLoad, (Ptr{Ptr{Nothing}}, Ptr{Cchar}), a, filename) new(a[1]) end end function unload(md::CuModule) - @cucall(:cuModuleUnload, (Ptr{Void},), md.handle) + @cucall(:cuModuleUnload, (Ptr{Nothing},), md.handle) end struct CuFunction - handle::Ptr{Void} + handle::Ptr{Nothing} function CuFunction(md::CuModule, name::String) - a = Array{Ptr{Void}}(1) - @cucall(:cuModuleGetFunction, (Ptr{Ptr{Void}}, Ptr{Void}, Ptr{Cchar}), + a = Array{Ptr{Nothing}}(1) + @cucall(:cuModuleGetFunction, (Ptr{Ptr{Nothing}}, Ptr{Nothing}, Ptr{Cchar}), a, md.handle, name) new(a[1]) end @@ -252,7 +252,7 @@ function launch(f::CuFunction, grid::CuDim, block::CuDim, args::Tuple; shmem_byt kernel_args = [cubox(arg) for arg in args] @cucall(:cuLaunchKernel, ( - Ptr{Void}, # function + Ptr{Nothing}, # function Cuint, # grid dim x Cuint, # grid dim y Cuint, # grid dim z @@ -260,10 +260,10 @@ function launch(f::CuFunction, grid::CuDim, block::CuDim, args::Tuple; shmem_byt Cuint, # block dim y Cuint, # block dim z Cuint, # shared memory bytes, - Ptr{Void}, # stream - Ptr{Ptr{Void}}, # kernel parameters, - Ptr{Ptr{Void}}), # extra parameters - f.handle, gx, gy, gz, tx, ty, tz, shmem_bytes, stream.handle, kernel_args, Ptr{Ptr{Void}}(0)) + Ptr{Nothing}, # stream + Ptr{Ptr{Nothing}}, # kernel parameters, + Ptr{Ptr{Nothing}}), # extra parameters + f.handle, gx, gy, gz, tx, ty, tz, shmem_bytes, stream.handle, kernel_args, Ptr{Ptr{Nothing}}(0)) end end # module CUDA diff --git a/src/cuda/cudnn.jl b/src/cuda/cudnn.jl index 600c03e..518c304 100644 --- a/src/cuda/cudnn.jl +++ b/src/cuda/cudnn.jl @@ -56,8 +56,8 @@ macro cudnncall(fv, argtypes, args...) end end -const Handle = Ptr{Void} -const StreamHandle = Ptr{Void} +const Handle = Ptr{Nothing} +const StreamHandle = Ptr{Nothing} function create() handle = Handle[0] @@ -77,10 +77,10 @@ function get_stream(handle::Handle) end # Data structures to represent Image/Filter and the Neural Network Layer -const Tensor4dDescriptor = Ptr{Void} -const ConvolutionDescriptor = Ptr{Void} -const PoolingDescriptor = Ptr{Void} -const FilterDescriptor = Ptr{Void} +const Tensor4dDescriptor = Ptr{Nothing} +const ConvolutionDescriptor = Ptr{Nothing} +const PoolingDescriptor = Ptr{Nothing} +const FilterDescriptor = Ptr{Nothing} const CUDNN_DATA_FLOAT = 0 const CUDNN_DATA_DOUBLE = 1 @@ -157,7 +157,7 @@ function destroy_tensor4d_descriptor(desc :: Tensor4dDescriptor) end function transform_tensor4d(handle::Handle, src_desc::Tensor4dDescriptor, src::CuPtr, dest_desc::Tensor4dDescriptor, dest::CuPtr) - @cudnncall(:cudnnTransformTensor4d, (Handle, Tensor4dDescriptor, Ptr{Void}, Tensor4dDescriptor, Ptr{Void}), + @cudnncall(:cudnnTransformTensor4d, (Handle, Tensor4dDescriptor, Ptr{Nothing}, Tensor4dDescriptor, Ptr{Nothing}), handle, src_desc, src.p, dest_desc, dest.p) end @@ -170,7 +170,7 @@ function add_tensor{T<:AbstractFloat}(handle::Handle, alpha::T, alpha_ptr = T[alpha] beta_ptr = T[beta] - @cudnncall(:cudnnAddTensor, (Handle, Ptr{Void}, Tensor4dDescriptor, Ptr{Void}, Ptr{Void}, Tensor4dDescriptor, Ptr{Void}), + @cudnncall(:cudnnAddTensor, (Handle, Ptr{Nothing}, Tensor4dDescriptor, Ptr{Nothing}, Ptr{Nothing}, Tensor4dDescriptor, Ptr{Nothing}), handle, alpha_ptr, bias_desc, bias.p, beta_ptr, srcdst_desc, srcdst.p) end @@ -178,7 +178,7 @@ function set_tensor4d{T<:AbstractFloat}(handle::Handle, desc::Tensor4dDescriptor @assert typeof(val) == get_tensor4d_descriptor(desc)[0] val_ptr = T[val] - @cudnncall(:cudnnSetTensor4d, (Handle, Tensor4dDescriptor, Ptr{Void}, Ptr{Void}), + @cudnncall(:cudnnSetTensor4d, (Handle, Tensor4dDescriptor, Ptr{Nothing}, Ptr{Nothing}), handle, desc, data.p, val_ptr) end @@ -316,10 +316,10 @@ function convolution_forward{T<:AbstractFloat}(handle::Handle, alpha::T, src_des alpha_ptr = T[alpha] beta_ptr = T[beta] @assert CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM <= algo <= CUDNN_CONVOLUTION_FWD_ALGO_FFT - @cudnncall(:cudnnConvolutionForward, (Handle, Ptr{Void}, Tensor4dDescriptor, Ptr{Void}, - FilterDescriptor, Ptr{Void}, ConvolutionDescriptor, - Cint, Ptr{Void}, Csize_t, Ptr{Void}, - Tensor4dDescriptor, Ptr{Void}), + @cudnncall(:cudnnConvolutionForward, (Handle, Ptr{Nothing}, Tensor4dDescriptor, Ptr{Nothing}, + FilterDescriptor, Ptr{Nothing}, ConvolutionDescriptor, + Cint, Ptr{Nothing}, Csize_t, Ptr{Nothing}, + Tensor4dDescriptor, Ptr{Nothing}), handle, alpha_ptr, src_desc, src.p, filter_desc, filter.p, conv, algo, workspace.p, workspace_size, beta_ptr, @@ -330,8 +330,8 @@ function convolution_backward_bias{T<:AbstractFloat}(handle::Handle, alpha::T, s beta::T, dest_desc::Tensor4dDescriptor, dest::CuPtr) alpha_ptr = T[alpha] beta_ptr = T[beta] - @cudnncall(:cudnnConvolutionBackwardBias, (Handle, Ptr{Void}, Tensor4dDescriptor, Ptr{Void}, - Ptr{Void}, Tensor4dDescriptor, Ptr{Void}), handle, alpha_ptr, src_desc, src.p, beta_ptr, dest_desc, dest.p) + @cudnncall(:cudnnConvolutionBackwardBias, (Handle, Ptr{Nothing}, Tensor4dDescriptor, Ptr{Nothing}, + Ptr{Nothing}, Tensor4dDescriptor, Ptr{Nothing}), handle, alpha_ptr, src_desc, src.p, beta_ptr, dest_desc, dest.p) end function convolution_backward_filter{T<:AbstractFloat}(handle::Handle, alpha::T, src_desc::Tensor4dDescriptor, src::CuPtr, diff_desc::Tensor4dDescriptor, diff::CuPtr, conv::ConvolutionDescriptor, @@ -346,18 +346,18 @@ function convolution_backward_filter{T<:AbstractFloat}(handle::Handle, alpha::T, @cudnncall(:cudnnConvolutionBackwardFilter, (Handle, - Ptr{Void}, # const void *alpha + Ptr{Nothing}, # const void *alpha Tensor4dDescriptor, # const cudnnTensorDescriptor_t xDesc - Ptr{Void}, # const void *x + Ptr{Nothing}, # const void *x Tensor4dDescriptor, # const cudnnTensorDescroptor_t dyDesc - Ptr{Void}, # const void *dy + Ptr{Nothing}, # const void *dy ConvolutionDescriptor, # const cudnnConvolutionDescriptor_t Cint, # cudnnConvolutionBwdFilterAlgo_t - Ptr{Void}, # void *workSpace + Ptr{Nothing}, # void *workSpace Csize_t, # size_t workSpaceSizeInBytes - Ptr{Void}, # const void *beta + Ptr{Nothing}, # const void *beta FilterDescriptor, # const cudnnFilterDescriptor_t dwDesc - Ptr{Void}), # void *dw + Ptr{Nothing}), # void *dw handle, alpha_ptr, src_desc, src.p, diff_desc, diff.p, conv, bwd_filter_algor, workspace, workspace_size, beta_ptr, grad_desc, grad.p) @@ -374,12 +374,12 @@ function convolution_backward_data{T<:AbstractFloat}(handle::Handle, alpha::T, f workspace = C_NULL workspace_size = 0 - @cudnncall(:cudnnConvolutionBackwardData, (Handle, Ptr{Void}, FilterDescriptor, Ptr{Void}, - Tensor4dDescriptor, Ptr{Void}, + @cudnncall(:cudnnConvolutionBackwardData, (Handle, Ptr{Nothing}, FilterDescriptor, Ptr{Nothing}, + Tensor4dDescriptor, Ptr{Nothing}, ConvolutionDescriptor, - Cint, Ptr{Void}, Csize_t, - Ptr{Void},Tensor4dDescriptor, - Ptr{Void}), + Cint, Ptr{Nothing}, Csize_t, + Ptr{Nothing},Tensor4dDescriptor, + Ptr{Nothing}), handle, alpha_ptr, filter_desc, filter.p, diff_desc, diff.p, conv, bwd_data_algor, workspace, workspace_size, beta_ptr, grad_desc, grad.p) @@ -398,8 +398,8 @@ function softmax_forward{T<:AbstractFloat}(handle::Handle, algorithm::Int, mode: @assert CUDNN_SOFTMAX_MODE_INSTANCE <= mode <= CUDNN_SOFTMAX_MODE_CHANNEL alpha_ptr = T[alpha] beta_ptr = T[beta] - @cudnncall(:cudnnSoftmaxForward, (Handle, Cint, Cint, Ptr{Void}, Tensor4dDescriptor, Ptr{Void}, - Ptr{Void}, Tensor4dDescriptor, Ptr{Void}), + @cudnncall(:cudnnSoftmaxForward, (Handle, Cint, Cint, Ptr{Nothing}, Tensor4dDescriptor, Ptr{Nothing}, + Ptr{Nothing}, Tensor4dDescriptor, Ptr{Nothing}), handle, algorithm, mode, alpha_ptr, src_desc, src.p, beta_ptr, dest_desc, dest.p) end @@ -410,8 +410,8 @@ function softmax_backward{T<:AbstractFloat}(handle::Handle, algorithm::Int, mode @assert CUDNN_SOFTMAX_MODE_INSTANCE <= mode <= CUDNN_SOFTMAX_MODE_CHANNEL alpha_ptr = T[alpha] beta_ptr = T[beta] - @cudnncall(:cudnnSoftmaxBackward, (Handle, Cint, Cint, Ptr{Void}, Tensor4dDescriptor, Ptr{Void}, - Tensor4dDescriptor, Ptr{Void}, Ptr{Void}, Tensor4dDescriptor, Ptr{Void}), + @cudnncall(:cudnnSoftmaxBackward, (Handle, Cint, Cint, Ptr{Nothing}, Tensor4dDescriptor, Ptr{Nothing}, + Tensor4dDescriptor, Ptr{Nothing}, Ptr{Nothing}, Tensor4dDescriptor, Ptr{Nothing}), handle, algorithm, mode, alpha_ptr, src_desc, src.p, srcdiff_desc, srcdiff.p, beta_ptr, destdiff_desc, destdiff.p) end @@ -448,9 +448,9 @@ function pooling_forward{T<:AbstractFloat}(handle::Handle, pooling::PoolingDescr dest_desc::Tensor4dDescriptor, dest::CuPtr) alpha_ptr = T[alpha] beta_ptr = T[beta] - @cudnncall(:cudnnPoolingForward, (Handle, PoolingDescriptor, Ptr{Void}, - Tensor4dDescriptor, Ptr{Void}, Ptr{Void}, - Tensor4dDescriptor, Ptr{Void}), + @cudnncall(:cudnnPoolingForward, (Handle, PoolingDescriptor, Ptr{Nothing}, + Tensor4dDescriptor, Ptr{Nothing}, Ptr{Nothing}, + Tensor4dDescriptor, Ptr{Nothing}), handle, pooling, alpha_ptr, src_desc, src.p, beta_ptr, dest_desc, dest.p) @@ -460,9 +460,9 @@ function pooling_backward{T<:AbstractFloat}(handle::Handle, pooling::PoolingDesc dest_desc::Tensor4dDescriptor, dest::CuPtr, beta::T, destdiff_desc::Tensor4dDescriptor, destdiff::CuPtr) alpha_ptr = T[alpha] beta_ptr = T[beta] - @cudnncall(:cudnnPoolingBackward, (Handle, PoolingDescriptor, Ptr{Void}, Tensor4dDescriptor, Ptr{Void}, - Tensor4dDescriptor, Ptr{Void},Tensor4dDescriptor, Ptr{Void}, - Ptr{Void}, Tensor4dDescriptor, Ptr{Void}), + @cudnncall(:cudnnPoolingBackward, (Handle, PoolingDescriptor, Ptr{Nothing}, Tensor4dDescriptor, Ptr{Nothing}, + Tensor4dDescriptor, Ptr{Nothing},Tensor4dDescriptor, Ptr{Nothing}, + Ptr{Nothing}, Tensor4dDescriptor, Ptr{Nothing}), handle, pooling, alpha_ptr, src_desc, src.p, srcdiff_desc, srcdiff.p, dest_desc, dest.p, beta_ptr, destdiff_desc, destdiff.p) end @@ -475,17 +475,17 @@ const CUDNN_ACTIVATION_TANH = 2 function activation_forward(handle::Handle, mode::Int, src_desc::Tensor4dDescriptor, src::CuPtr, dest_desc::Tensor4dDescriptor, dest::CuPtr) @assert CUDNN_ACTIVATION_SIGMOID <= mode <+ CUDNN_ACTIVATION_TANH - @cudnncall(:cudnnActivationForward, (Handle, Cint, Tensor4dDescriptor, Ptr{Void}, - Tensor4dDescriptor, Ptr{Void}), + @cudnncall(:cudnnActivationForward, (Handle, Cint, Tensor4dDescriptor, Ptr{Nothing}, + Tensor4dDescriptor, Ptr{Nothing}), handle, mode, src_desc, src.p, dest_desc, dest.p) end function activation_backward(handle::Handle, mode::Int, src_desc::Tensor4dDescriptor, src::CuPtr, srcdiff_desc::Tensor4dDescriptor, srcdiff::CuPtr, dest_desc::Tensor4dDescriptor, dest::CuPtr, destdiff_desc::Tensor4dDescriptor, destdiff::CuPtr) @assert CUDNN_ACTIVATION_SIGMOID <= mode <+ CUDNN_ACTIVATION_TANH - @cudnncall(:cudnnActivationBackward, (Handle, Cint, Tensor4dDescriptor, Ptr{Void}, - Tensor4dDescriptor, Ptr{Void},Tensor4dDescriptor, Ptr{Void}, - Tensor4dDescriptor, Ptr{Void}), + @cudnncall(:cudnnActivationBackward, (Handle, Cint, Tensor4dDescriptor, Ptr{Nothing}, + Tensor4dDescriptor, Ptr{Nothing},Tensor4dDescriptor, Ptr{Nothing}, + Tensor4dDescriptor, Ptr{Nothing}), handle, mode, src_desc, src.p, srcdiff_desc, srcdiff.p, dest_desc, dest.p, destdiff_desc, destdiff.p) end diff --git a/src/cuda/layers/hdf5-data.jl b/src/cuda/layers/hdf5-data.jl index a9a6307..993ffa3 100644 --- a/src/cuda/layers/hdf5-data.jl +++ b/src/cuda/layers/hdf5-data.jl @@ -1,6 +1,6 @@ function set_blob_data{T}(data::Array{T}, blob::CuTensorBlob{T}, blob_idx::Int) n_fea = get_fea_size(blob) - ptr = Base.unsafe_convert(Ptr{Void}, blob.ptr.p) + sizeof(T) * n_fea * (blob_idx-1) # note 0-based indexing in CUDA Vector - CuBLAS.set_vector(length(data), sizeof(T), convert(Ptr{Void},pointer(data)), 1, ptr, 1) + ptr = Base.unsafe_convert(Ptr{Nothing}, blob.ptr.p) + sizeof(T) * n_fea * (blob_idx-1) # note 0-based indexing in CUDA Vector + CuBLAS.set_vector(length(data), sizeof(T), convert(Ptr{Nothing},pointer(data)), 1, ptr, 1) end diff --git a/src/cuda/utils/math.jl b/src/cuda/utils/math.jl index b2690a1..f7b753e 100644 --- a/src/cuda/utils/math.jl +++ b/src/cuda/utils/math.jl @@ -13,8 +13,8 @@ for (ctype, dtype) in [(:float, Float32), (:double, Float64)] for name in [:add, :sub, :mul, :div, :div2] @eval begin function $(Symbol("$(name)!"))(backend::GPUBackend, ::Type{$dtype}, X, Y, len::Int) - X = convert(Ptr{Void},X) - Y = convert(Ptr{Void},Y) + X = convert(Ptr{Nothing},X) + Y = convert(Ptr{Nothing},Y) cuda_dim = cuda_geometry(len) kernel = backend.mocha.$(Symbol("elem_$(name)_$ctype")) CUDA.launch(kernel, cuda_dim..., (X, Y, len)) @@ -25,7 +25,7 @@ for (ctype, dtype) in [(:float, Float32), (:double, Float64)] # define add_scal! @eval begin function add_scal!(backend::GPUBackend, ::Type{$dtype}, X, Y, len::Int) - X = convert(Ptr{Void}, X) + X = convert(Ptr{Nothing}, X) Y = convert($dtype, Y) cuda_dim = cuda_geometry(len) kernel = backend.mocha.$(Symbol("add_scal_$ctype")) @@ -36,7 +36,7 @@ for (ctype, dtype) in [(:float, Float32), (:double, Float64)] # define log! @eval begin function log!(backend::GPUBackend, ::Type{$dtype}, X, len::Int) - X = convert(Ptr{Void}, X) + X = convert(Ptr{Nothing}, X) cuda_dim = cuda_geometry(len) kernel = backend.mocha.$(Symbol("elem_log_$ctype")) CUDA.launch(kernel, cuda_dim..., (X,len)) @@ -46,7 +46,7 @@ for (ctype, dtype) in [(:float, Float32), (:double, Float64)] # define exp! @eval begin function exp!(backend::GPUBackend, ::Type{$dtype}, X, len::Int) - X = convert(Ptr{Void}, X) + X = convert(Ptr{Nothing}, X) cuda_dim = cuda_geometry(len) kernel = backend.mocha.$(Symbol("elem_exp_$ctype")) CUDA.launch(kernel, cuda_dim..., (X,len)) @@ -56,7 +56,7 @@ for (ctype, dtype) in [(:float, Float32), (:double, Float64)] # define mul_scal! @eval begin function mul_scal!(backend::GPUBackend, ::Type{$dtype}, X, Y, len::Int) - X = convert(Ptr{Void}, X) + X = convert(Ptr{Nothing}, X) Y = convert($dtype, Y) cuda_dim = cuda_geometry(len) kernel = backend.mocha.$(Symbol("mul_scal_$ctype")) @@ -95,7 +95,7 @@ for (postfix, dt1, dt2) in [(:fi, Float32, Int), (:di, Float64, Int), (:ff, Float32, Float32), (:dd, Float64, Float64)] @eval begin function pow!(backend::GPUBackend, ::Type{$dt1}, X, Y::$dt2, len::Int) - X = convert(Ptr{Void}, X) + X = convert(Ptr{Nothing}, X) cuda_dim = cuda_geometry(len) kernel = backend.mocha.$(Symbol("elem_pow_$postfix")) CUDA.launch(kernel, cuda_dim..., (X,Y,len)) diff --git a/src/data-transformers.jl b/src/data-transformers.jl index facd762..f8ad84d 100644 --- a/src/data-transformers.jl +++ b/src/data-transformers.jl @@ -24,7 +24,7 @@ end # module DataTransformers ################################################################################ # Subtract Mean ################################################################################ -type SubMeanState <: DataTransformerState +struct SubMeanState <: DataTransformerState transformer :: DataTransformers.SubMean mean_blob :: Blob multiplier :: Blob @@ -67,7 +67,7 @@ end ################################################################################ # Scale ################################################################################ -type ScaleState{T} <: DataTransformerState +struct ScaleState{T} <: DataTransformerState transformer :: DataTransformers.Scale scale :: T end diff --git a/src/layers/hdf5-data.jl b/src/layers/hdf5-data.jl index 6418d8c..fa70276 100644 --- a/src/layers/hdf5-data.jl +++ b/src/layers/hdf5-data.jl @@ -13,7 +13,7 @@ using HDF5 ) -type HDF5DataLayerState <: LayerState +struct HDF5DataLayerState <: LayerState layer :: HDF5DataLayer blobs :: Vector{Blob} epoch :: Int diff --git a/src/layers/memory-data.jl b/src/layers/memory-data.jl index bbfe128..2fca9d8 100644 --- a/src/layers/memory-data.jl +++ b/src/layers/memory-data.jl @@ -10,7 +10,7 @@ is_source => true ) -type MemoryDataLayerState <: LayerState +struct MemoryDataLayerState <: LayerState layer :: MemoryDataLayer blobs :: Vector{Blob} epoch :: Int diff --git a/src/layers/pooling/native-impl.jl b/src/layers/pooling/native-impl.jl index 19380b0..8113764 100644 --- a/src/layers/pooling/native-impl.jl +++ b/src/layers/pooling/native-impl.jl @@ -12,7 +12,7 @@ function max_pooling_forward(input::Array{Float32}, output::Array{Float32}, mask width, height, channels, num = size(input) pooled_width = size(output, 1) pooled_height = size(output, 2) - ccall(MAX_POOL_FWD_FLOAT, Void, + ccall(MAX_POOL_FWD_FLOAT, Nothing, (Ptr{Float32}, Ptr{Float32}, Ptr{Csize_t}, Cint, Cint, Cint, Cint, Cint, Cint, @@ -27,7 +27,7 @@ function max_pooling_forward(input::Array{Float64}, output::Array{Float64}, mask width, height, channels, num = size(input) pooled_width = size(output, 1) pooled_height = size(output, 2) - ccall(MAX_POOL_FWD_DOUBLE, Void, + ccall(MAX_POOL_FWD_DOUBLE, Nothing, (Ptr{Float64}, Ptr{Float64}, Ptr{Csize_t}, Cint, Cint, Cint, Cint, Cint, Cint, @@ -43,7 +43,7 @@ function max_pooling_backward(input::Array{Float32}, output::Array{Float32}, mas width, height, channels, num = size(input) pooled_width = size(output, 1) pooled_height = size(output, 2) - ccall(MAX_POOL_BWD_FLOAT, Void, + ccall(MAX_POOL_BWD_FLOAT, Nothing, (Ptr{Float32}, Ptr{Float32}, Ptr{Csize_t}, Cint, Cint, Cint, Cint, Cint, Cint, @@ -58,7 +58,7 @@ function max_pooling_backward(input::Array{Float64}, output::Array{Float64}, mas width, height, channels, num = size(input) pooled_width = size(output, 1) pooled_height = size(output, 2) - ccall(MAX_POOL_BWD_DOUBLE, Void, + ccall(MAX_POOL_BWD_DOUBLE, Nothing, (Ptr{Float64}, Ptr{Float64}, Ptr{Csize_t}, Cint, Cint, Cint, Cint, Cint, Cint, @@ -75,7 +75,7 @@ function mean_pooling_forward(input::Array{Float32}, output::Array{Float32}, lay pooled_width = size(output, 1) pooled_height = size(output, 2) - ccall(MEAN_POOL_FWD_FLOAT, Void, + ccall(MEAN_POOL_FWD_FLOAT, Nothing, (Ptr{Float32}, Ptr{Float32}, Cint, Cint, Cint, Cint, Cint, Cint, @@ -91,7 +91,7 @@ function mean_pooling_forward(input::Array{Float64}, output::Array{Float64}, lay pooled_width = size(output, 1) pooled_height = size(output, 2) - ccall(MEAN_POOL_FWD_DOUBLE, Void, + ccall(MEAN_POOL_FWD_DOUBLE, Nothing, (Ptr{Float64}, Ptr{Float64}, Cint, Cint, Cint, Cint, Cint, Cint, @@ -108,7 +108,7 @@ function mean_pooling_backward(input::Array{Float32}, output::Array{Float32}, la pooled_width = size(output, 1) pooled_height = size(output, 2) - ccall(MEAN_POOL_BWD_FLOAT, Void, + ccall(MEAN_POOL_BWD_FLOAT, Nothing, (Ptr{Float32}, Ptr{Float32}, Cint, Cint, Cint, Cint, Cint, Cint, @@ -124,7 +124,7 @@ function mean_pooling_backward(input::Array{Float64}, output::Array{Float64}, la pooled_width = size(output, 1) pooled_height = size(output, 2) - ccall(MEAN_POOL_BWD_DOUBLE, Void, + ccall(MEAN_POOL_BWD_DOUBLE, Nothing, (Ptr{Float64}, Ptr{Float64}, Cint, Cint, Cint, Cint, Cint, Cint, diff --git a/src/macros.jl b/src/macros.jl index 7c481fa..bfe2c46 100644 --- a/src/macros.jl +++ b/src/macros.jl @@ -36,10 +36,10 @@ macro defstruct(name, super_name, fields) @assert length(fields) > 0 name = esc(name) - field_defs = Array{Expr}(length(fields)) # :(field2 :: Int) - field_names = Array{Symbol}(length(fields)) # :field2 - field_defaults = Array{Expr}(length(fields)) # :(field2 :: Int = 0) - field_asserts = Array{Expr}(length(fields)) # :(field2 >= 0) + field_defs = Array{Expr}(undef,length(fields)) # :(field2 :: Int) + field_names = Array{Symbol}(undef,length(fields)) # :field2 + field_defaults = Array{Expr}(undef,length(fields)) # :(field2 :: Int = 0) + field_asserts = Array{Expr}(undef,length(fields)) # :(field2 >= 0) for i = 1:length(fields) field = fields[i] @@ -118,7 +118,7 @@ end # ) ############################################################# macro characterize_layer(layer, properties...) - defs = Array{Expr}(length(properties)) + defs = Array{Expr}(undef,length(properties)) for (i,prop) in enumerate(properties) prop_name, prop_val = parse_property(prop) defs[i] = quote diff --git a/src/neurons.jl b/src/neurons.jl index 332f606..aef4247 100644 --- a/src/neurons.jl +++ b/src/neurons.jl @@ -26,31 +26,31 @@ export forward, backward # messy namespace ############################################################ module Neurons -using ..Mocha.ActivationFunction +using ..Mocha: ActivationFunction # Identity -type Identity <: ActivationFunction +struct Identity <: ActivationFunction end # Rectified-Linear: ReLU(eps)(x) = max(x,eps) -type ReLU <: ActivationFunction +struct ReLU <: ActivationFunction epsilon::Float64 # optional floor value, default zero end ReLU() = ReLU(0.0) # Exponential: Exponential(x) = exp(x) -type Exponential <: ActivationFunction +struct Exponential <: ActivationFunction end # Leaky Rectified-Linear: LReLU(x) = x > 0 ? x : 0.01x -type LReLU <: ActivationFunction +struct LReLU <: ActivationFunction end # Sigmoid: Sigmoid(x) = 1 / (1 + exp(-x)) -type Sigmoid <: ActivationFunction +struct Sigmoid <: ActivationFunction end # Sigmoid: Tanh(x) = (1 + exp(-2x)) / (1 + exp(-2x)) -type Tanh <: ActivationFunction +struct Tanh <: ActivationFunction end end # module Neurons diff --git a/src/parameter.jl b/src/parameter.jl index 1714082..fa55b42 100644 --- a/src/parameter.jl +++ b/src/parameter.jl @@ -1,7 +1,7 @@ export Parameter export make_parameter, share_parameter -type Parameter <: AbstractParameter +struct Parameter <: AbstractParameter name :: AbstractString blob :: Blob gradient :: Blob @@ -17,8 +17,8 @@ end Parameter(name,blob,gradient,initializer,regularizer,constraint,lr) = Parameter(name, blob, gradient, initializer, regularizer, constraint, lr, RefCounter(1)) -function make_parameter{N}(backend::Backend, name::AbstractString, data_type::Type, dims::NTuple{N,Int}, - init::Initializer, regu::Regularizer, cons::Constraint, lr::AbstractFloat) +function make_parameter(backend::Backend, name::AbstractString, data_type::Type, dims::NTuple{N,Int}, + init::Initializer, regu::Regularizer, cons::Constraint, lr::AbstractFloat) where {N} blob = make_blob(backend, data_type, dims) grad = make_blob(backend, data_type, dims) diff --git a/src/pooling-functions.jl b/src/pooling-functions.jl index b967616..63ea68f 100644 --- a/src/pooling-functions.jl +++ b/src/pooling-functions.jl @@ -3,9 +3,9 @@ export PoolingFunction, Pooling @compat abstract type StdPoolingFunction <: PoolingFunction end # built-in poolings module Pooling -using ..Mocha.StdPoolingFunction +using ..Mocha: StdPoolingFunction -type Max <: StdPoolingFunction end -type Mean <: StdPoolingFunction end +struct Max <: StdPoolingFunction end +struct Mean <: StdPoolingFunction end end # module Pooling diff --git a/src/solvers.jl b/src/solvers.jl index 07f4670..d650491 100644 --- a/src/solvers.jl +++ b/src/solvers.jl @@ -16,7 +16,7 @@ struct Solver{T<:SolverMethod} coffee_lounge :: Any # forward declaration end -Solver{T}(method::T, params::SolverParameters) = begin +Solver(method::T, params::SolverParameters) where {T} = begin validate_parameters(method, params) Solver(method, params, CoffeeLounge()) end diff --git a/src/utils/im2col-native.jl b/src/utils/im2col-native.jl index 13f7b10..56537da 100644 --- a/src/utils/im2col-native.jl +++ b/src/utils/im2col-native.jl @@ -18,7 +18,7 @@ end function im2col_impl(img::Ptr{Float32}, col::Ptr{Float32}, width::Int, height::Int, channels::Int, kernel_w::Int, kernel_h::Int, pad_w::Int, pad_h::Int, stride_w::Int, stride_h::Int) - ccall(IM2COL_FLOAT_HANDLE, Void, (Ptr{Float32}, Ptr{Float32}, Cint, Cint, Cint, + ccall(IM2COL_FLOAT_HANDLE, Nothing, (Ptr{Float32}, Ptr{Float32}, Cint, Cint, Cint, Cint, Cint, # kernel Cint, Cint, # pad Cint, Cint, # stride @@ -27,7 +27,7 @@ end function im2col_impl(img::Ptr{Float64}, col::Ptr{Float64}, width::Int, height::Int, channels::Int, kernel_w::Int, kernel_h::Int, pad_w::Int, pad_h::Int, stride_w::Int, stride_h::Int) - ccall(IM2COL_DOUBLE_HANDLE, Void, (Ptr{Float64}, Ptr{Float64}, Cint, Cint, Cint, + ccall(IM2COL_DOUBLE_HANDLE, Nothing, (Ptr{Float64}, Ptr{Float64}, Cint, Cint, Cint, Cint, Cint, # kernel Cint, Cint, # pad Cint, Cint, # stride @@ -51,7 +51,7 @@ end function col2im_impl(col::Ptr{Float32}, img::Ptr{Float32}, width::Int, height::Int, channels::Int, kernel_w::Int, kernel_h::Int, pad_w::Int, pad_h::Int, stride_w::Int, stride_h::Int) - ccall(COL2IM_FLOAT_HANDLE, Void, (Ptr{Float32}, Ptr{Float32}, Cint, Cint, Cint, + ccall(COL2IM_FLOAT_HANDLE, Nothing, (Ptr{Float32}, Ptr{Float32}, Cint, Cint, Cint, Cint, Cint, # kernel Cint, Cint, # pad Cint, Cint, # stride @@ -60,7 +60,7 @@ end function col2im_impl(col::Ptr{Float64}, img::Ptr{Float64}, width::Int, height::Int, channels::Int, kernel_w::Int, kernel_h::Int, pad_w::Int, pad_h::Int, stride_w::Int, stride_h::Int) - ccall(COL2IM_DOUBLE_HANDLE, Void, (Ptr{Float64}, Ptr{Float64}, Cint, Cint, Cint, + ccall(COL2IM_DOUBLE_HANDLE, Nothing, (Ptr{Float64}, Ptr{Float64}, Cint, Cint, Cint, Cint, Cint, # kernel Cint, Cint, # pad Cint, Cint, # stride diff --git a/src/utils/im2col.jl b/src/utils/im2col.jl index 93a4224..ccd65ac 100644 --- a/src/utils/im2col.jl +++ b/src/utils/im2col.jl @@ -1,11 +1,11 @@ -function im2col{T}(img::Array{T}, n::Int, col::Array{T}, width::Int, height::Int, channels::Int, - kernel::NTuple{2,Int}, pad::NTuple{2,Int}, stride::NTuple{2,Int}) +function im2col(img::Array{T}, n::Int, col::Array{T}, width::Int, height::Int, channels::Int, + kernel::NTuple{2,Int}, pad::NTuple{2,Int}, stride::NTuple{2,Int}) where {T} im2col_impl(img[:,:,:,n], col, width, height, channels, kernel, pad, stride) end -function im2col_impl{T}(img::Array{T}, col::Array{T}, width::Int, height::Int, channels::Int, - kernel::NTuple{2,Int}, pad::NTuple{2,Int}, stride::NTuple{2,Int}) +function im2col_impl(img::Array{T}, col::Array{T}, width::Int, height::Int, channels::Int, + kernel::NTuple{2,Int}, pad::NTuple{2,Int}, stride::NTuple{2,Int}) where {T} kernel_w, kernel_h = kernel pad_w, pad_h = pad @@ -34,15 +34,15 @@ function im2col_impl{T}(img::Array{T}, col::Array{T}, width::Int, height::Int, c end end -function col2im{T}(col::Array{T}, img::Array{T}, n::Int, img_buf::Array{T}, width::Int, height::Int, channels::Int, - kernel::NTuple{2,Int}, pad::NTuple{2,Int}, stride::NTuple{2,Int}) +function col2im(col::Array{T}, img::Array{T}, n::Int, img_buf::Array{T}, width::Int, height::Int, channels::Int, + kernel::NTuple{2,Int}, pad::NTuple{2,Int}, stride::NTuple{2,Int}) where {T} col2im_impl(col, img_buf, width, height, channels, kernel, pad, stride) img[:,:,:,n] = img_buf end -function col2im_impl{T}(col::Array{T}, img::Array{T}, width::Int, height::Int, channels::Int, - kernel::NTuple{2,Int}, pad::NTuple{2,Int}, stride::NTuple{2,Int}) +function col2im_impl(col::Array{T}, img::Array{T}, width::Int, height::Int, channels::Int, + kernel::NTuple{2,Int}, pad::NTuple{2,Int}, stride::NTuple{2,Int}) where {T} kernel_w, kernel_h = kernel pad_w, pad_h = pad diff --git a/src/utils/ref-count.jl b/src/utils/ref-count.jl index fe37d2a..7de21e4 100644 --- a/src/utils/ref-count.jl +++ b/src/utils/ref-count.jl @@ -5,7 +5,7 @@ import Base.dec export RefCounter, inc, dec, ref -type RefCounter +struct RefCounter count :: Int end diff --git a/test/layers/lrn.jl b/test/layers/lrn.jl index 31c26b9..db5cf65 100644 --- a/test/layers/lrn.jl +++ b/test/layers/lrn.jl @@ -35,7 +35,7 @@ function test_lrn_layer(backend::Backend, mode::LRNModeType, tensor_dim, T, eps) shutdown(backend, state) end -function lrn_forward_across_channel{T}(input::Array{T}, state, op_dim) +function lrn_forward_across_channel(input::Array{T}, state, op_dim) where {T} output = similar(input) pre_dim, chann_dim, post_dim = split_dims(input, op_dim) pre_pad = div(state.layer.kernel-1,2) @@ -58,7 +58,7 @@ function lrn_forward_across_channel{T}(input::Array{T}, state, op_dim) return output end -function lrn_forward_within_channel{T}(input::Array{T}, state) +function lrn_forward_within_channel(input::Array{T}, state) where {T} output = similar(input) width, height, channels, num = size(input) pooled_width = width; pooled_height = height @@ -86,7 +86,7 @@ function lrn_forward_within_channel{T}(input::Array{T}, state) return output end -function lrn_forward{T}(input::Array{T}, state, op_dim) +function lrn_forward(input::Array{T}, state, op_dim) where {T} if isa(state.layer.mode, LRNMode.AcrossChannel) lrn_forward_across_channel(input, state, op_dim) elseif isa(state.layer.mode, LRNMode.WithinChannel) @@ -96,7 +96,7 @@ function lrn_forward{T}(input::Array{T}, state, op_dim) end end -function lrn_backward_across_channel{T}(input::Array{T}, top_diff::Array{T}, state, op_dim) +function lrn_backward_across_channel(input::Array{T}, top_diff::Array{T}, state, op_dim) where {T} output = zeros(T, size(input)) pre_dim, chann_dim, post_dim = split_dims(input, op_dim) pre_pad = div(state.layer.kernel-1,2) @@ -126,7 +126,7 @@ function lrn_backward_across_channel{T}(input::Array{T}, top_diff::Array{T}, sta return output end -function lrn_backward_within_channel{T}(input::Array{T}, top_diff::Array{T}, state) +function lrn_backward_within_channel(input::Array{T}, top_diff::Array{T}, state) where {T} output = zeros(T, size(input)) width, height, channels, num = size(input) pooled_width = width; pooled_height = height @@ -160,7 +160,7 @@ function lrn_backward_within_channel{T}(input::Array{T}, top_diff::Array{T}, sta return output end -function lrn_backward{T}(input::Array{T}, top_diff::Array{T}, state, op_dim) +function lrn_backward(input::Array{T}, top_diff::Array{T}, state, op_dim) where {T} if isa(state.layer.mode, LRNMode.AcrossChannel) lrn_backward_across_channel(input, top_diff, state, op_dim) elseif isa(state.layer.mode, LRNMode.WithinChannel) diff --git a/test/layers/random-normal.jl b/test/layers/random-normal.jl index 0af18ce..f90dfcc 100644 --- a/test/layers/random-normal.jl +++ b/test/layers/random-normal.jl @@ -21,7 +21,7 @@ function test_random_normal_layer(backend::Backend, T, eps) eltype=T, batch_sizes=batch_sizes) state = setup(backend, layer, Blob[], Blob[]) - layer_data = [Array{T}(tuple(output_dims..., batch_sizes[i])) + layer_data = [Array(tuple(output_dims..., batch_sizes[i])) where {T} for i in 1:N] forward(backend, state, Blob[]) @@ -36,7 +36,7 @@ function test_random_normal_layer(backend::Backend, T, eps) # output should be different on subsequent calls - layer_data2 = [Array{T}(tuple(output_dims..., batch_sizes[i])) + layer_data2 = [Array(tuple(output_dims..., batch_sizes[i])) where {T} for i in 1:N] forward(backend, state, Blob[]) From f01bd5dcf2ab94bbb80b8fecac64040fe032dc06 Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Tue, 20 Nov 2018 01:18:52 -0500 Subject: [PATCH 08/24] runs in 1.0.2, but imagenet-classifier example causes segfault --- Project.toml | 1 + benchmarks/native-im2col/im2col-bm.jl | 2 +- docs/dev-guide/layer.rst | 2 +- docs/tutorial/mnist.rst | 24 +++++++++---------- .../ijulia/ilsvrc12/imagenet-classifier.ipynb | 2 +- examples/mnist/VAE.jl | 2 +- examples/mnist/mnist-demo.jl | 2 +- examples/mnist/mnist_learning_curve.jl | 10 ++++---- src/Mocha.jl | 3 ++- src/blob.jl | 7 +++--- src/coffee-break.jl | 4 ++-- src/coffee/snapshot.jl | 4 ++-- src/coffee/training-summary.jl | 2 +- src/coffee/validation-performance.jl | 2 +- src/compatibility.jl | 1 - src/cuda/backend.jl | 4 ++-- src/cuda/cuda.jl | 2 +- src/cuda/layers/convolution.jl | 2 +- src/cuda/layers/pooling.jl | 2 +- src/cuda/layers/softmax.jl | 2 +- src/layers/accuracy.jl | 2 +- src/layers/argmax.jl | 2 +- src/layers/async-hdf5-data.jl | 2 +- src/layers/binary-accuracy.jl | 2 +- src/layers/binary-cross-entropy-loss.jl | 2 +- src/layers/channel-pooling.jl | 2 +- src/layers/concat.jl | 2 +- src/layers/convolution.jl | 4 ++-- src/layers/crop.jl | 8 +++---- src/layers/dropout.jl | 7 +++--- src/layers/element-wise.jl | 15 ++++++------ src/layers/gaussian-kl-loss.jl | 2 +- src/layers/hdf5-output.jl | 2 +- src/layers/hinge-loss.jl | 12 +++++----- src/layers/identity.jl | 2 +- src/layers/index2onehot.jl | 2 +- src/layers/inner-product.jl | 2 +- src/layers/lrn.jl | 6 ++--- src/layers/memory-output.jl | 2 +- src/layers/multinomial-logistic-loss.jl | 5 ++-- src/layers/pooling.jl | 2 +- src/layers/pooling/channel-pooling.jl | 9 ++++--- src/layers/pooling/julia-impl.jl | 9 ++++--- src/layers/power.jl | 2 +- src/layers/random-mask.jl | 2 +- src/layers/random-normal.jl | 2 +- src/layers/reshape.jl | 2 +- src/layers/softlabel-softmax-loss.jl | 2 +- src/layers/softmax-loss.jl | 5 ++-- src/layers/softmax.jl | 2 +- src/layers/split.jl | 5 ++-- src/layers/square-loss.jl | 2 +- src/layers/tied-inner-product.jl | 2 +- src/layers/wasserstein-loss.jl | 2 +- src/net.jl | 2 +- src/solvers.jl | 8 +++---- src/solvers/adadelta.jl | 4 ++-- src/solvers/adagrad.jl | 4 ++-- src/solvers/adam.jl | 4 ++-- src/solvers/nesterov.jl | 4 ++-- src/solvers/policies.jl | 23 +++++++++--------- src/solvers/sgd.jl | 4 ++-- src/utils/gradient-checking.jl | 2 +- test/utils/ref-count.jl | 2 +- tools/image-classifier.jl | 2 +- 65 files changed, 132 insertions(+), 138 deletions(-) diff --git a/Project.toml b/Project.toml index c93c7e6..43b916f 100644 --- a/Project.toml +++ b/Project.toml @@ -8,3 +8,4 @@ JLD = "4138dd39-2aa7-5051-a626-17a0bb65d9c8" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Memento = "f28f55f0-a522-5efc-85c2-fe41dfb9b2d9" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" diff --git a/benchmarks/native-im2col/im2col-bm.jl b/benchmarks/native-im2col/im2col-bm.jl index c78e3a3..2cb06fd 100644 --- a/benchmarks/native-im2col/im2col-bm.jl +++ b/benchmarks/native-im2col/im2col-bm.jl @@ -19,7 +19,7 @@ using Benchmark # | 2 | "im2col_c" | 0.00514862 | 6.19335 | 50 | ################################################################################ -function im2col{T}(img::Array{T}, col::Array{T}, width::Int, height::Int, channels::Int, kernel::NTuple{2,Int}, pad::NTuple{2,Int}, stride::NTuple{2,Int}) +function im2col(img::Array{T}, col::Array{T}, width::Int, height::Int, channels::Int, kernel::NTuple{2,Int}, pad::NTuple{2,Int}, stride::NTuple{2,Int}) where {T} kernel_w, kernel_h = kernel pad_w, pad_h = pad stride_w, stride_h = stride diff --git a/docs/dev-guide/layer.rst b/docs/dev-guide/layer.rst index 90e3327..98b6af1 100644 --- a/docs/dev-guide/layer.rst +++ b/docs/dev-guide/layer.rst @@ -63,7 +63,7 @@ For example .. code-block:: julia - type PoolingLayerState <: LayerState + struct PoolingLayerState <: LayerState layer :: PoolingLayer blobs :: Vector{Blob} blobs_diff :: Vector{Blob} diff --git a/docs/tutorial/mnist.rst b/docs/tutorial/mnist.rst index d026259..8c19714 100644 --- a/docs/tutorial/mnist.rst +++ b/docs/tutorial/mnist.rst @@ -391,19 +391,19 @@ Using Saved Snapshots for Prediction Often you want to use a network previously trained with Mocha to make individual predictions. Earlier during the training process snapshots of the network state were saved every 5000 iterations, and these can be reloaded at a later time. To do this we first need a network with the same shape and configuration as the one used for training, except instead we supply a ``MemoryDataLayer`` instead of a ``HDF5DataLayer``, and a ``SoftmaxLayer`` instead of a ``SoftmaxLossLayer``: .. code-block:: julia - + using Mocha backend = CPUBackend() init(backend) - + mem_data = MemoryDataLayer(name="data", tops=[:data], batch_size=1, data=Array[zeros(Float32, 28, 28, 1, 1)]) softmax_layer = SoftmaxLayer(name="prob", tops=[:prob], bottoms=[:ip2]) # define common_layers as earlier - + run_net = Net("imagenet", backend, [mem_data, common_layers..., softmax_layer]) - + Note that ``common_layers`` has the same definition as above, and that we specifically pass a ``Float32`` array to the ``MemoryDataLayer`` so that it will match the ``Float32`` data type used in the MNIST HDF5 training dataset. Next we fill in this network with the learned parameters from the final training snapshot: .. code-block:: julia @@ -424,7 +424,7 @@ Now we are ready to make predictions using our trained model. A simple way to ac println() println("Label probability vector:") println(run_net.output_blobs[:prob].data) - + This produces the output: .. code-block:: text @@ -465,7 +465,7 @@ Given this data we can write a new Julia script to read the ``statistics.jld`` f In order to see the plot we need to use a plotting package. The PyPlot package that implements matplotlib for Julia is adequate for this. Use the standard ``Pkg.add("PyPlot")`` if you do not already have it. We will also need to load the ``statistics.jld`` file using Julia's implementation of the HDF5 format which requires the JLD packge. .. code-block:: julia - + using PyPlot, JLD Next, we need to load the data. This is not difficult, but requires some careful handling because the ``statistics.jld`` file is a Julia Dict that includes several sub-dictionaries. You may need to adjust the path in the ``load("snapshots/statistics.jld")`` command so that it accurately reflects the path from where the code is running to the ``snapshots`` directory. @@ -503,16 +503,16 @@ In pure gradient descent the solution moves closer to a minima each and every st .. code-block:: julia - function low_pass{T <: Real}(x::Vector{T}, window::Int) + function low_pass(x::Vector{T}, window::Int) where {T <: Real} len = length(x) y = Vector{Float64}(len) for i in 1:len # I want the mean of the first i terms up to width of window - # Putting some numbers to this with window 4 + # Putting some numbers to this with window 4 # i win lo hi - # 1 4 1 1 - # 2 4 1 2 - # 3 4 1 3 + # 1 4 1 1 + # 2 4 1 2 + # 3 4 1 3 # 4 4 1 4 # 5 4 1 5 # 6 4 2 6 => window starts to slide @@ -538,5 +538,3 @@ We declare ``window`` to be about one-quarter the length of the input to enforce There are lots of great resources on the web for building and training neural networks and after this example you now know how to use Julia and Mocha to contruct, train, and validate one of the most famous convolutional neural networks. **Thank you for working all the way to the end of the MNIST tutorial!** - - diff --git a/examples/ijulia/ilsvrc12/imagenet-classifier.ipynb b/examples/ijulia/ilsvrc12/imagenet-classifier.ipynb index 5345197..d7733e6 100644 --- a/examples/ijulia/ilsvrc12/imagenet-classifier.ipynb +++ b/examples/ijulia/ilsvrc12/imagenet-classifier.ipynb @@ -2955,7 +2955,7 @@ "x(c.node.ownerSVGElement)||a.select(\"svg\")}function b(c,a,m){function b(c){if(null==c)return aa;if(c==+c)return c;v(B,{width:c});try{return B.getBBox().width}catch(a){return 0}}function h(c){if(null==c)return aa;if(c==+c)return c;v(B,{height:c});try{return B.getBBox().height}catch(a){return 0}}function e(b,B){null==a?d[b]=B(c.attr(b)||0):b==a&&(d=B(null==m?c.attr(b)||0:m))}var f=p(c).node,d={},B=f.querySelector(\".svg---mgr\");B||(B=v(\"rect\"),v(B,{x:-9E9,y:-9E9,width:10,height:10,\"class\":\"svg---mgr\",\r\n", "fill:\"none\"}),f.appendChild(B));switch(c.type){case \"rect\":e(\"rx\",b),e(\"ry\",h);case \"image\":e(\"width\",b),e(\"height\",h);case \"text\":e(\"x\",b);e(\"y\",h);break;case \"circle\":e(\"cx\",b);e(\"cy\",h);e(\"r\",b);break;case \"ellipse\":e(\"cx\",b);e(\"cy\",h);e(\"rx\",b);e(\"ry\",h);break;case \"line\":e(\"x1\",b);e(\"x2\",b);e(\"y1\",h);e(\"y2\",h);break;case \"marker\":e(\"refX\",b);e(\"markerWidth\",b);e(\"refY\",h);e(\"markerHeight\",h);break;case \"radialGradient\":e(\"fx\",b);e(\"fy\",h);break;case \"tspan\":e(\"dx\",b);e(\"dy\",h);break;default:e(a,\r\n", "b)}f.removeChild(B);return d}function q(c){y(c,\"array\")||(c=Array.prototype.slice.call(arguments,0));for(var a=0,b=0,m=this.node;this[a];)delete this[a++];for(a=0;a window starts to slide diff --git a/src/Mocha.jl b/src/Mocha.jl index ed0d486..12edf6e 100644 --- a/src/Mocha.jl +++ b/src/Mocha.jl @@ -1,6 +1,7 @@ module Mocha -using Memento, Printf +using Memento +using Printf, Random const logger = getlogger(Mocha) diff --git a/src/blob.jl b/src/blob.jl index 3a55c7b..0bec094 100644 --- a/src/blob.jl +++ b/src/blob.jl @@ -1,7 +1,8 @@ export Blob export CPUBlob, NullBlob -import Base: eltype, size, length, ndims, copy!, fill!, show, randn! +import Base: eltype, size, length, ndims, copy!, fill!, show +import Random: randn! export eltype, size, length, ndims, copy!, fill!, erase!, show export get_num, get_height, get_width, get_fea_size, to_array export make_blob, make_zero_blob, reshape_blob @@ -87,7 +88,7 @@ function randn!(dst :: Blob) # should fill dst with iid standard normal variates end ############################################################ -# A Dummy Blob type holding nothing +# A Dummy Blob struct holding nothing ############################################################ struct NullBlob <: Blob{Nothing, 0} end @@ -128,7 +129,7 @@ end struct CPUBlob{T <: AbstractFloat, N} <: Blob{T, N} data :: AbstractArray{T, N} end -CPUBlob(t :: Type, dims::NTuple{N,Int}) where {N} = CPUBlob(Array{t}(dims)) +CPUBlob(t :: Type, dims::NTuple{N,Int}) where {N} = CPUBlob(Array{t}(dims)) function make_blob(backend::CPUBackend, data_type::Type, dims::NTuple{N,Int}) where {N} return CPUBlob(data_type, dims) diff --git a/src/coffee-break.jl b/src/coffee-break.jl index 4e5abe1..45e1332 100644 --- a/src/coffee-break.jl +++ b/src/coffee-break.jl @@ -10,7 +10,7 @@ function init(::Coffee, ::Net) end function enjoy(::Any, ::Coffee, ::Net, ::SolverState) end function destroy(::Coffee, ::Net) end -type CoffeeBreak +struct CoffeeBreak coffee :: Coffee every_n_iter :: Int every_n_epoch :: Int @@ -38,7 +38,7 @@ using HDF5, JLD const StatisticsValue = AbstractFloat const StatisticsRecords = Dict{Int, StatisticsValue} -type CoffeeLounge +struct CoffeeLounge filename :: AbstractString save_every_n_iter :: Int file_exists :: Symbol # :overwrite, :panic, :merge diff --git a/src/coffee/snapshot.jl b/src/coffee/snapshot.jl index 5f5a617..353f5ad 100644 --- a/src/coffee/snapshot.jl +++ b/src/coffee/snapshot.jl @@ -2,7 +2,7 @@ using HDF5, JLD export Snapshot -type Snapshot <: Coffee +struct Snapshot <: Coffee dir :: AbstractString end @@ -17,7 +17,7 @@ end const SOLVER_STATE_KEY = "solver_state" -function enjoy{T<:InternalSolverState}(lounge::CoffeeLounge, coffee::Snapshot, net::Net, state::SolverState{T}) +function enjoy(lounge::CoffeeLounge, coffee::Snapshot, net::Net, state::SolverState{T}) where {T<:InternalSolverState} fn = @sprintf("snapshot-%06d.jld", state.iter) m_info("Saving snapshot to $fn...") path = joinpath(coffee.dir, fn) diff --git a/src/coffee/training-summary.jl b/src/coffee/training-summary.jl index f1f4ac2..89ca0c2 100644 --- a/src/coffee/training-summary.jl +++ b/src/coffee/training-summary.jl @@ -1,6 +1,6 @@ export TrainingSummary -type TrainingSummary <: Coffee +struct TrainingSummary <: Coffee statistic_names :: Vector{Any} #Default Constructor diff --git a/src/coffee/validation-performance.jl b/src/coffee/validation-performance.jl index d5e0f7d..0b1c412 100644 --- a/src/coffee/validation-performance.jl +++ b/src/coffee/validation-performance.jl @@ -1,7 +1,7 @@ export ValidationPerformance export register -type ValidationPerformance <: Coffee +struct ValidationPerformance <: Coffee validation_net :: Net ValidationPerformance(net::Net) = new(net, Function[]) diff --git a/src/compatibility.jl b/src/compatibility.jl index 99bcfcc..15a0630 100644 --- a/src/compatibility.jl +++ b/src/compatibility.jl @@ -24,4 +24,3 @@ else return fnc_name end end -export blasfunc \ No newline at end of file diff --git a/src/cuda/backend.jl b/src/cuda/backend.jl index e6bdefb..7c6b675 100644 --- a/src/cuda/backend.jl +++ b/src/cuda/backend.jl @@ -13,7 +13,7 @@ macro defkernels(kernels...) field_init_block = Expr(:block, field_inits...) esc(quote - type MochaKernels + struct MochaKernels mod :: CUDA.CuModule $type_body @@ -144,7 +144,7 @@ function shutdown(mocha :: MochaKernels) CUDA.unload(mocha.mod) end -type GPUBackend <: AbstractGPUBackend +struct GPUBackend <: AbstractGPUBackend param_registry :: ParameterRegistry initialized :: Bool cu_ctx :: CUDA.CuContext diff --git a/src/cuda/cuda.jl b/src/cuda/cuda.jl index c84077b..3316898 100644 --- a/src/cuda/cuda.jl +++ b/src/cuda/cuda.jl @@ -144,7 +144,7 @@ end ############################################################ const CUdeviceptr = Ptr{Nothing} -type CuPtr +struct CuPtr p::CUdeviceptr CuPtr() = new(convert(CUdeviceptr, 0)) diff --git a/src/cuda/layers/convolution.jl b/src/cuda/layers/convolution.jl index 92143f7..8ef6d1e 100644 --- a/src/cuda/layers/convolution.jl +++ b/src/cuda/layers/convolution.jl @@ -1,4 +1,4 @@ -type CuDNNConvState +struct CuDNNConvState inputs_desc :: Vector{CuDNN.Tensor4dDescriptor} outputs_desc :: Vector{CuDNN.Tensor4dDescriptor} conv_desc :: Vector{CuDNN.ConvolutionDescriptor} diff --git a/src/cuda/layers/pooling.jl b/src/cuda/layers/pooling.jl index 72b00f1..511397a 100644 --- a/src/cuda/layers/pooling.jl +++ b/src/cuda/layers/pooling.jl @@ -1,4 +1,4 @@ -type CuDNNPoolingState +struct CuDNNPoolingState pooling_desc :: CuDNN.PoolingDescriptor inputs_desc :: Vector{CuDNN.Tensor4dDescriptor} outputs_desc :: Vector{CuDNN.Tensor4dDescriptor} diff --git a/src/cuda/layers/softmax.jl b/src/cuda/layers/softmax.jl index 167a2fa..7177411 100644 --- a/src/cuda/layers/softmax.jl +++ b/src/cuda/layers/softmax.jl @@ -1,4 +1,4 @@ -type CuDNNSoftmaxState +struct CuDNNSoftmaxState inputs_desc :: Vector{CuDNN.Tensor4dDescriptor} outputs_desc :: Vector{CuDNN.Tensor4dDescriptor} end diff --git a/src/layers/accuracy.jl b/src/layers/accuracy.jl index 93b2372..3f414c1 100644 --- a/src/layers/accuracy.jl +++ b/src/layers/accuracy.jl @@ -9,7 +9,7 @@ has_stats => true, ) -type AccuracyLayerState <: LayerState +struct AccuracyLayerState <: LayerState layer :: AccuracyLayer op_dim :: Int diff --git a/src/layers/argmax.jl b/src/layers/argmax.jl index 343d783..d157f05 100644 --- a/src/layers/argmax.jl +++ b/src/layers/argmax.jl @@ -5,7 +5,7 @@ (bottoms :: Vector{Symbol} = Symbol[], length(bottoms) == length(tops)), ) -type ArgmaxLayerState <: LayerState +struct ArgmaxLayerState <: LayerState layer :: ArgmaxLayer blobs :: Vector{Blob} diff --git a/src/layers/async-hdf5-data.jl b/src/layers/async-hdf5-data.jl index 244f3dd..d576732 100644 --- a/src/layers/async-hdf5-data.jl +++ b/src/layers/async-hdf5-data.jl @@ -15,7 +15,7 @@ using HDF5 const AsyncCommsType = @static VERSION < v"0.6-" ? Task : Channel{Any} -type AsyncHDF5DataLayerState <: LayerState +struct AsyncHDF5DataLayerState <: LayerState layer :: AsyncHDF5DataLayer blobs :: Vector{Blob} epoch :: Int diff --git a/src/layers/binary-accuracy.jl b/src/layers/binary-accuracy.jl index ceaacc1..9c08a22 100644 --- a/src/layers/binary-accuracy.jl +++ b/src/layers/binary-accuracy.jl @@ -9,7 +9,7 @@ has_stats => true, ) -type BinaryAccuracyLayerState <: LayerState +struct BinaryAccuracyLayerState <: LayerState layer :: BinaryAccuracyLayer accuracy :: Float64 diff --git a/src/layers/binary-cross-entropy-loss.jl b/src/layers/binary-cross-entropy-loss.jl index 7442714..dc9f1d9 100644 --- a/src/layers/binary-cross-entropy-loss.jl +++ b/src/layers/binary-cross-entropy-loss.jl @@ -12,7 +12,7 @@ can_do_bp => true, ) -type BinaryCrossEntropyLossLayerState{T} <: LayerState +struct BinaryCrossEntropyLossLayerState{T} <: LayerState layer :: BinaryCrossEntropyLossLayer loss :: T end diff --git a/src/layers/channel-pooling.jl b/src/layers/channel-pooling.jl index 4087192..20a1e53 100644 --- a/src/layers/channel-pooling.jl +++ b/src/layers/channel-pooling.jl @@ -12,7 +12,7 @@ can_do_bp => true, ) -type ChannelPoolingLayerState <: LayerState +struct ChannelPoolingLayerState <: LayerState layer :: ChannelPoolingLayer blobs :: Vector{Blob} blobs_diff :: Vector{Blob} diff --git a/src/layers/concat.jl b/src/layers/concat.jl index 814ccbe..dde5dfc 100644 --- a/src/layers/concat.jl +++ b/src/layers/concat.jl @@ -8,7 +8,7 @@ can_do_bp => true ) -type ConcatLayerState <: LayerState +struct ConcatLayerState <: LayerState layer :: ConcatLayer blobs :: Vector{Blob} blobs_diff :: Vector{Blob} diff --git a/src/layers/convolution.jl b/src/layers/convolution.jl index 063ac87..cdaa83b 100644 --- a/src/layers/convolution.jl +++ b/src/layers/convolution.jl @@ -24,7 +24,7 @@ can_do_bp => true ) -type CPUConvState +struct CPUConvState col_buffer :: Blob M :: Int N :: Int @@ -55,7 +55,7 @@ function setup_etc(backend::CPUBackend, layer::ConvolutionLayer, dtype, width, h return etc end -type ConvolutionLayerState <: LayerState +struct ConvolutionLayerState <: LayerState layer :: ConvolutionLayer blobs :: Vector{Blob} blobs_diff :: Vector{Blob} diff --git a/src/layers/crop.jl b/src/layers/crop.jl index 8cf5fa2..14f463f 100644 --- a/src/layers/crop.jl +++ b/src/layers/crop.jl @@ -7,7 +7,7 @@ (tops :: Vector{Symbol} = [], length(tops) == length(bottoms)) ) -type CropLayerState <: LayerState +struct CropLayerState <: LayerState layer :: CropLayer blobs :: Vector{Blob} end @@ -18,7 +18,7 @@ function setup(backend::Backend, layer::CropLayer, inputs::Vector{Blob}, diffs:: @assert isa(diffs[i], NullBlob) # Back-propagation for crop-layer is not implemented end - blobs = Array{Blob}(length(inputs)) + blobs = Array(length(inputs)) where {Blob} for i = 1:length(inputs) width, height, channels, num = size(inputs[i]) @assert layer.crop_size[1] <= width && layer.crop_size[2] <= height @@ -33,7 +33,7 @@ function shutdown(backend::Backend, state::CropLayerState) map(destroy, state.blobs) end -function crop_blob{T}(input::Array{T}, output::Array{T}, crop_size::NTuple{2,Int}, offsets::NTuple{2,Int}) +function crop_blob(input::Array{T}, output::Array{T}, crop_size::NTuple{2,Int}, offsets::NTuple{2,Int}) where {T} crop_w = crop_size[1]; w_off = offsets[1] crop_h = crop_size[2]; h_off = offsets[2] num = size(input, 4); channels = size(input, 3) @@ -48,7 +48,7 @@ function crop_blob{T}(input::Array{T}, output::Array{T}, crop_size::NTuple{2,Int end end end -function mirror_crop_blob{T}(input::Array{T}, output::Array{T}, crop_size::NTuple{2,Int}, offsets::NTuple{2,Int}) +function mirror_crop_blob(input::Array{T}, output::Array{T}, crop_size::NTuple{2,Int}, offsets::NTuple{2,Int}) where {T} crop_w = crop_size[1]; w_off = offsets[1] crop_h = crop_size[2]; h_off = offsets[2] num = size(input, 4); channels = size(input, 3) diff --git a/src/layers/dropout.jl b/src/layers/dropout.jl index b00002c..3d52171 100644 --- a/src/layers/dropout.jl +++ b/src/layers/dropout.jl @@ -19,7 +19,7 @@ is_inplace => true ) -type DropoutLayerState{T} <: LayerState +struct DropoutLayerState{T} <: LayerState layer :: DropoutLayer rand_vals :: Blob @@ -51,7 +51,7 @@ function shutdown(backend::Backend, state::DropoutLayerState) destroy_etc(backend, state) end -function dropout_forward{T}(input::Array{T}, rand_vals::Array{T}, ratio::T, scale::T) +function dropout_forward(input::Array{T}, rand_vals::Array{T}, ratio::T, scale::T) where {T} len = length(input) @simd for i = 1:len @inbounds input[i] = input[i] * (rand_vals[i] > ratio) * scale @@ -62,7 +62,7 @@ function forward(backend::CPUBackend, state::DropoutLayerState, inputs::Vector{B dropout_forward(inputs[1].data, state.rand_vals.data, state.ratio, state.scale) end -function dropout_backward{T}(grad::Array{T}, rand_vals::Array{T}, ratio::T, scale::T) +function dropout_backward(grad::Array{T}, rand_vals::Array{T}, ratio::T, scale::T) where {T} len = length(grad) @simd for i = 1:len @inbounds grad[i] = grad[i] * (rand_vals[i] > ratio) * scale @@ -73,4 +73,3 @@ function backward(backend::CPUBackend, state::DropoutLayerState, inputs::Vector{ dropout_backward(diffs[1].data, state.rand_vals.data, state.ratio, state.scale) end end - diff --git a/src/layers/element-wise.jl b/src/layers/element-wise.jl index fa63c04..36500fb 100644 --- a/src/layers/element-wise.jl +++ b/src/layers/element-wise.jl @@ -2,17 +2,17 @@ export ElementWiseFunctorType, ElementWiseFunctors export get_num_args @compat abstract type ElementWiseFunctorType{NArg} end -get_num_args{NArg}(::ElementWiseFunctorType{NArg}) = NArg +get_num_args(::ElementWiseFunctorType{NArg}) where {NArg} = NArg module ElementWiseFunctors using ..Mocha -type Add <: ElementWiseFunctorType{2} +struct Add <: ElementWiseFunctorType{2} end -type Subtract <: ElementWiseFunctorType{2} +struct Subtract <: ElementWiseFunctorType{2} end -type Multiply <: ElementWiseFunctorType{2} +struct Multiply <: ElementWiseFunctorType{2} end -type Divide <: ElementWiseFunctorType{2} +struct Divide <: ElementWiseFunctorType{2} end end # module ElementWiseFunctors @@ -29,7 +29,7 @@ end # module ElementWiseFunctors can_do_bp => true ) -type ElementWiseLayerState{Op<:ElementWiseFunctorType} <: LayerState +struct ElementWiseLayerState{Op<:ElementWiseFunctorType} <: LayerState layer :: ElementWiseLayer blobs :: Vector{Blob} blobs_diff :: Vector{Blob} @@ -59,7 +59,7 @@ for (functor, op) in ((ElementWiseFunctors.Add, (+)), # I'm getting the following warning unless I extract the for loop # as a separate function with clear type annotations. # Warning: could not attach metadata for @simd loop. - function functor_impl{T}(::$functor, input1::Array{T}, input2::Array{T}, output::Array{T}) + function functor_impl(::$functor, input1::Array{T}, input2::Array{T}, output::Array{T}) where {T} len = length(input1) @simd for i = 1:len @inbounds output[i] = $op(input1[i], input2[i]) @@ -120,4 +120,3 @@ function backward(backend::CPUBackend, state::ElementWiseLayerState{ElementWiseF BLAS.scal!(length(diffs[2]), convert(eltype(diffs[2]),-1), diffs[2].data, 1) end end - diff --git a/src/layers/gaussian-kl-loss.jl b/src/layers/gaussian-kl-loss.jl index 8e61383..22a45dc 100644 --- a/src/layers/gaussian-kl-loss.jl +++ b/src/layers/gaussian-kl-loss.jl @@ -20,7 +20,7 @@ has_stats => true, ) -type GaussianKLLossLayerState{T, B<:Blob} <: LayerState +struct GaussianKLLossLayerState{T, B<:Blob} <: LayerState layer :: GaussianKLLossLayer loss :: T loss_accum :: T diff --git a/src/layers/hdf5-output.jl b/src/layers/hdf5-output.jl index afe9f14..1674438 100644 --- a/src/layers/hdf5-output.jl +++ b/src/layers/hdf5-output.jl @@ -11,7 +11,7 @@ using HDF5 is_sink => true ) -type HDF5OutputLayerState <: LayerState +struct HDF5OutputLayerState <: LayerState layer :: HDF5OutputLayer file :: HDF5File buffer :: Vector{Array} diff --git a/src/layers/hinge-loss.jl b/src/layers/hinge-loss.jl index 7ca8d2e..d4c4a58 100644 --- a/src/layers/hinge-loss.jl +++ b/src/layers/hinge-loss.jl @@ -16,7 +16,7 @@ has_stats => true, ) -type HingeLossLayerState{T} <: LayerState +struct HingeLossLayerState{T} <: LayerState layer :: HingeLossLayer loss :: T @@ -82,12 +82,12 @@ function forward(backend::CPUBackend, state::HingeLossLayerState, inputs::Vector end function backward(backend::CPUBackend, state::HingeLossLayerState, inputs::Vector{Blob}, diffs::Vector{Blob}) - const pred = inputs[1] - const label = inputs[2] + pred = inputs[1] + label = inputs[2] - const data_type = eltype(pred) - const n = length(pred) - const num = get_num(pred) + data_type = eltype(pred) + n = length(pred) + num = get_num(pred) if isa(diffs[1], CPUBlob) erase!(diffs[1]) diff --git a/src/layers/identity.jl b/src/layers/identity.jl index 0d3e6f0..bb895a8 100644 --- a/src/layers/identity.jl +++ b/src/layers/identity.jl @@ -10,7 +10,7 @@ can_do_bp => true ) -type IdentityLayerState <: LayerState +struct IdentityLayerState <: LayerState layer :: IdentityLayer blobs :: Vector{Blob} blobs_diff :: Vector{Blob} diff --git a/src/layers/index2onehot.jl b/src/layers/index2onehot.jl index 50bdd25..5877ae8 100644 --- a/src/layers/index2onehot.jl +++ b/src/layers/index2onehot.jl @@ -6,7 +6,7 @@ (n_class :: Int = 0, n_class > 0) ) -type Index2OnehotLayerState <: LayerState +struct Index2OnehotLayerState <: LayerState layer :: Index2OnehotLayer blobs :: Vector{Blob} diff --git a/src/layers/inner-product.jl b/src/layers/inner-product.jl index 7028919..4fa4b7b 100644 --- a/src/layers/inner-product.jl +++ b/src/layers/inner-product.jl @@ -20,7 +20,7 @@ has_neuron => true ) -type InnerProductLayerState <: LayerState +struct InnerProductLayerState <: LayerState layer :: InnerProductLayer blobs :: Vector{Blob} blobs_diff :: Vector{Blob} diff --git a/src/layers/lrn.jl b/src/layers/lrn.jl index 87a2f4c..9e2d0f0 100644 --- a/src/layers/lrn.jl +++ b/src/layers/lrn.jl @@ -3,8 +3,8 @@ export LRNModeType, LRNMode @compat abstract type LRNModeType end module LRNMode import ..LRNModeType -type AcrossChannel <: LRNModeType end -type WithinChannel <: LRNModeType end +struct AcrossChannel <: LRNModeType end +struct WithinChannel <: LRNModeType end end # module LRNMode ################################################################################ @@ -25,7 +25,7 @@ end # module LRNMode can_do_bp => true ) -type LRNLayerState <: LayerState +struct LRNLayerState <: LayerState layer :: LRNLayer blobs :: Vector{Blob} blobs_diff :: Vector{Blob} diff --git a/src/layers/memory-output.jl b/src/layers/memory-output.jl index 1e25b4f..cd04735 100644 --- a/src/layers/memory-output.jl +++ b/src/layers/memory-output.jl @@ -8,7 +8,7 @@ export reset_outputs is_sink => true ) -type MemoryOutputLayerState <: LayerState +struct MemoryOutputLayerState <: LayerState layer :: MemoryOutputLayer outputs :: Vector{Vector{Array}} end diff --git a/src/layers/multinomial-logistic-loss.jl b/src/layers/multinomial-logistic-loss.jl index 7b5763a..ed2d543 100644 --- a/src/layers/multinomial-logistic-loss.jl +++ b/src/layers/multinomial-logistic-loss.jl @@ -14,7 +14,7 @@ is_sink => true, ) -type MultinomialLogisticLossLayerState{T} <: LayerState +struct MultinomialLogisticLossLayerState{T} <: LayerState layer :: MultinomialLogisticLossLayer loss :: T @@ -96,7 +96,7 @@ function forward(backend::CPUBackend, state::MultinomialLogisticLossLayerState, map(x -> round(Int, x), label) .+ 1 else dim = dims[i] - reshape(1:dim, [j == i? dim : 1 for j = 1:length(dims)]...) + reshape(1:dim, [j == i ? dim : 1 for j = 1:length(dims)]...) end end @@ -111,4 +111,3 @@ end function backward(backend::Backend, state::MultinomialLogisticLossLayerState, inputs::Vector{Blob}, diffs::Vector{Blob}) end - diff --git a/src/layers/pooling.jl b/src/layers/pooling.jl index 643a49c..3fa7154 100644 --- a/src/layers/pooling.jl +++ b/src/layers/pooling.jl @@ -13,7 +13,7 @@ has_neuron => true ) -type PoolingLayerState <: LayerState +struct PoolingLayerState <: LayerState layer :: PoolingLayer blobs :: Vector{Blob} blobs_diff :: Vector{Blob} diff --git a/src/layers/pooling/channel-pooling.jl b/src/layers/pooling/channel-pooling.jl index 3af3c3d..536d299 100644 --- a/src/layers/pooling/channel-pooling.jl +++ b/src/layers/pooling/channel-pooling.jl @@ -1,7 +1,7 @@ ################################################################################ # Pooling in channels ################################################################################ -function max_channel_pooling_forward{T}(input::Array{T,3}, output::Array{T,3}, mask::Array{Csize_t,3}, layer) +function max_channel_pooling_forward(input::Array{T,3}, output::Array{T,3}, mask::Array{Csize_t,3}, layer) where {T} spatial_dim, channels, num = size(input) pooled_chann = size(output, 2) @@ -30,7 +30,7 @@ function max_channel_pooling_forward{T}(input::Array{T,3}, output::Array{T,3}, m end end -function mean_channel_pooling_forward{T}(input::Array{T,3}, output::Array{T,3}, integral::Array{T}, layer) +function mean_channel_pooling_forward(input::Array{T,3}, output::Array{T,3}, integral::Array{T}, layer) where {T} spatial_dim_T, channels, num = size(input) pooled_chann = size(output, 2) one = convert(T, 1) @@ -73,7 +73,7 @@ function mean_channel_pooling_forward{T}(input::Array{T,3}, output::Array{T,3}, end end -function max_channel_pooling_backward{T}(input::Array{T,3}, output::Array{T,3}, mask::Array{Csize_t,3}, layer) +function max_channel_pooling_backward(input::Array{T,3}, output::Array{T,3}, mask::Array{Csize_t,3}, layer) where {T} spatial_dim, channels, num = size(input) pooled_chann = size(output, 2) @@ -91,7 +91,7 @@ function max_channel_pooling_backward{T}(input::Array{T,3}, output::Array{T,3}, end end -function mean_channel_pooling_backward{T}(input::Array{T,3}, output::Array{T,3}, layer) +function mean_channel_pooling_backward(input::Array{T,3}, output::Array{T,3}, layer) where {T} spatial_dim_T, channels, num = size(input) pooled_chann = size(output, 2) scale = 1/convert(T, layer.kernel) @@ -119,4 +119,3 @@ function mean_channel_pooling_backward{T}(input::Array{T,3}, output::Array{T,3}, end end end - diff --git a/src/layers/pooling/julia-impl.jl b/src/layers/pooling/julia-impl.jl index 831426d..f0f8552 100644 --- a/src/layers/pooling/julia-impl.jl +++ b/src/layers/pooling/julia-impl.jl @@ -1,7 +1,7 @@ ################################################################################ # Pooling in image dimension (width and height) ################################################################################ -function max_pooling_forward{T}(input::Array{T}, output::Array{T}, mask::Array{Csize_t}, layer) +function max_pooling_forward(input::Array{T}, output::Array{T}, mask::Array{Csize_t}, layer) where {T} width, height, channels, num = size(input) pooled_width = size(output, 1) pooled_height = size(output, 2) @@ -38,7 +38,7 @@ function max_pooling_forward{T}(input::Array{T}, output::Array{T}, mask::Array{C end end -function mean_pooling_forward{T}(input::Array{T}, output::Array{T}, layer) +function mean_pooling_forward(input::Array{T}, output::Array{T}, layer) where {T} width, height, channels, num = size(input) pooled_width = size(output, 1) pooled_height = size(output, 2) @@ -68,7 +68,7 @@ function mean_pooling_forward{T}(input::Array{T}, output::Array{T}, layer) end end -function max_pooling_backward{T}(input::Array{T}, output::Array{T}, mask::Array{Csize_t}, layer) +function max_pooling_backward(input::Array{T}, output::Array{T}, mask::Array{Csize_t}, layer) where {T} width, height, channels, num = size(input) pooled_width = size(output, 1) pooled_height = size(output, 2) @@ -88,7 +88,7 @@ function max_pooling_backward{T}(input::Array{T}, output::Array{T}, mask::Array{ end end -function mean_pooling_backward{T}(input::Array{T}, output::Array{T}, layer) +function mean_pooling_backward(input::Array{T}, output::Array{T}, layer) where {T} width, height, channels, num = size(input) pooled_width = size(output, 1) pooled_height = size(output, 2) @@ -117,4 +117,3 @@ function mean_pooling_backward{T}(input::Array{T}, output::Array{T}, layer) end end end - diff --git a/src/layers/power.jl b/src/layers/power.jl index fd21904..0772e52 100644 --- a/src/layers/power.jl +++ b/src/layers/power.jl @@ -13,7 +13,7 @@ can_do_bp => true ) -type PowerLayerState <: LayerState +struct PowerLayerState <: LayerState layer :: PowerLayer blobs :: Vector{Blob} blobs_diff :: Vector{Blob} diff --git a/src/layers/random-mask.jl b/src/layers/random-mask.jl index 9bd7a34..1234faf 100644 --- a/src/layers/random-mask.jl +++ b/src/layers/random-mask.jl @@ -8,7 +8,7 @@ is_inplace => true, ) -type RandomMaskLayerState <: LayerState +struct RandomMaskLayerState <: LayerState layer :: RandomMaskLayer dropouts :: Vector{DropoutLayerState} diff --git a/src/layers/random-normal.jl b/src/layers/random-normal.jl index d640b5f..050d70e 100644 --- a/src/layers/random-normal.jl +++ b/src/layers/random-normal.jl @@ -11,7 +11,7 @@ is_source => true ) -type RandomNormalLayerState <: LayerState +struct RandomNormalLayerState <: LayerState layer :: RandomNormalLayer blobs :: Vector{Blob} etc :: Vector{Any} diff --git a/src/layers/reshape.jl b/src/layers/reshape.jl index a20e83b..41d0750 100644 --- a/src/layers/reshape.jl +++ b/src/layers/reshape.jl @@ -9,7 +9,7 @@ can_do_bp => true, # back-propagate via upper layers ) -type ReshapeLayerState <: LayerState +struct ReshapeLayerState <: LayerState layer :: ReshapeLayer blobs :: Vector{Blob} blobs_diff :: Vector{Blob} diff --git a/src/layers/softlabel-softmax-loss.jl b/src/layers/softlabel-softmax-loss.jl index 7fa88c7..f59a51d 100644 --- a/src/layers/softlabel-softmax-loss.jl +++ b/src/layers/softlabel-softmax-loss.jl @@ -10,7 +10,7 @@ is_sink => true ) -type SoftlabelSoftmaxLossLayerState{T} <: LayerState +struct SoftlabelSoftmaxLossLayerState{T} <: LayerState layer :: SoftlabelSoftmaxLossLayer loss :: T diff --git a/src/layers/softmax-loss.jl b/src/layers/softmax-loss.jl index e9a7ef5..ae0d438 100644 --- a/src/layers/softmax-loss.jl +++ b/src/layers/softmax-loss.jl @@ -15,7 +15,7 @@ is_sink => true ) -type SoftmaxLossLayerState{T} <: LayerState +struct SoftmaxLossLayerState{T} <: LayerState layer :: SoftmaxLossLayer loss :: T @@ -59,7 +59,7 @@ function backward(backend::CPUBackend, state::SoftmaxLossLayerState, inputs::Vec map(x -> round(Int, x), label) .+ 1 else dim = dims[i] - reshape(1:dim, [j == i? dim : 1 for j = 1:length(dims)]...) + reshape(1:dim, [j == i ? dim : 1 for j = 1:length(dims)]...) end end @@ -80,4 +80,3 @@ function backward(backend::CPUBackend, state::SoftmaxLossLayerState, inputs::Vec Vec.mul_scal!(diff.data, state.layer.weight * dims[state.logistic.op_dim]/prod(dims)) end end - diff --git a/src/layers/softmax.jl b/src/layers/softmax.jl index ae6475e..890a8ad 100644 --- a/src/layers/softmax.jl +++ b/src/layers/softmax.jl @@ -11,7 +11,7 @@ can_do_bp => true, ) -type SoftmaxLayerState <: LayerState +struct SoftmaxLayerState <: LayerState layer :: SoftmaxLayer blobs :: Vector{Blob} blobs_diff :: Vector{Blob} diff --git a/src/layers/split.jl b/src/layers/split.jl index 86204f9..74dee39 100644 --- a/src/layers/split.jl +++ b/src/layers/split.jl @@ -11,7 +11,7 @@ can_do_bp => true ) -type SplitLayerState{N} <: LayerState +struct SplitLayerState{N} <: LayerState layer :: SplitLayer blobs :: Vector{Blob} blobs_diff :: Vector{Blob} @@ -57,7 +57,7 @@ function forward(backend::Backend, state::SplitLayerState, inputs::Vector{Blob}) end end -function backward{N}(backend::CPUBackend, state::SplitLayerState{N}, inputs::Vector{Blob}, diffs::Vector{Blob}) +function backward(backend::CPUBackend, state::SplitLayerState{N}, inputs::Vector{Blob}, diffs::Vector{Blob}) where {N} if !isa(diffs[1], NullBlob) diff = diffs[1] len = length(diff) @@ -67,4 +67,3 @@ function backward{N}(backend::CPUBackend, state::SplitLayerState{N}, inputs::Vec end end end - diff --git a/src/layers/square-loss.jl b/src/layers/square-loss.jl index 03c25c3..00aafe5 100644 --- a/src/layers/square-loss.jl +++ b/src/layers/square-loss.jl @@ -15,7 +15,7 @@ has_stats => true, ) -type SquareLossLayerState{T} <: LayerState +struct SquareLossLayerState{T} <: LayerState layer :: SquareLossLayer loss :: T diff --git a/src/layers/tied-inner-product.jl b/src/layers/tied-inner-product.jl index b59b540..d458de9 100644 --- a/src/layers/tied-inner-product.jl +++ b/src/layers/tied-inner-product.jl @@ -16,7 +16,7 @@ has_neuron => true ) -type TiedInnerProductLayerState <: LayerState +struct TiedInnerProductLayerState <: LayerState layer :: TiedInnerProductLayer blobs :: Vector{Blob} blobs_diff :: Vector{Blob} diff --git a/src/layers/wasserstein-loss.jl b/src/layers/wasserstein-loss.jl index b62c90e..bcb43ac 100644 --- a/src/layers/wasserstein-loss.jl +++ b/src/layers/wasserstein-loss.jl @@ -11,7 +11,7 @@ is_sink => true, ) -type WassersteinLossLayerState{T} <: LayerState +struct WassersteinLossLayerState{T} <: LayerState layer :: WassersteinLossLayer loss :: T diff --git a/src/net.jl b/src/net.jl index 5f4a26e..bd3b852 100644 --- a/src/net.jl +++ b/src/net.jl @@ -3,7 +3,7 @@ export init, destroy, forward, forward_epoch, backward, forward_backward, get_ep export get_layer, get_layer_state, freeze!, unfreeze!, freeze_all!, unfreeze_all! export dump_statistics, reset_statistics -type Net{T <: Backend} +struct Net{T <: Backend} name :: AbstractString backend :: T diff --git a/src/solvers.jl b/src/solvers.jl index d650491..6e4aed3 100644 --- a/src/solvers.jl +++ b/src/solvers.jl @@ -21,20 +21,20 @@ Solver(method::T, params::SolverParameters) where {T} = begin Solver(method, params, CoffeeLounge()) end -type SolverState{T<:InternalSolverState} +struct SolverState{T<:InternalSolverState} iter :: Int obj_val :: Float64 losses :: Dict internal :: T end -function copy_solver_state!{T<:InternalSolverState}(dst::SolverState{T}, src::SolverState{T}) +function copy_solver_state!(dst::SolverState{T}, src::SolverState{T}) where {T<:InternalSolverState} dst.iter = src.iter dst.obj_val = src.obj_val dst.losses = src.losses dst.internal = src.internal end -SolverState{T<:InternalSolverState}(internal::T) = SolverState{T}(0, Inf, Dict(), internal) +SolverState(internal::T) where {T<:InternalSolverState} = SolverState{T}(0, Inf, Dict(), internal) @compat abstract type SolverStateSnapshot end # Just the serializable part of the solver state, for snapshot files @@ -108,7 +108,7 @@ function solver_state(solver::SolverMethod, net::Net, params::SolverParameters) error("solver_state is not implemented for the solver type $(typeof(solver)), net type $(typeof(net)), and params type $(typeof(params))") end -function update{T}(solver::Solver{T}, net::Net, state::SolverState) # should do one iteration of update +function update(solver::Solver{T}, net::Net, state::SolverState) where {T} # should do one iteration of update error("update is not implemented for the solver type $(typeof(solver)), net type $(typeof(net)), and state type $(typeof(state))") end function shutdown(state::SolverState) # should shutdown the solver diff --git a/src/solvers/adadelta.jl b/src/solvers/adadelta.jl index 27282ed..7d60c99 100644 --- a/src/solvers/adadelta.jl +++ b/src/solvers/adadelta.jl @@ -13,13 +13,13 @@ make_solver_parameters(method::Adadelta; kwargs...) = validate_parameters(method::Adadelta, params::SolverParameters) = validate_parameters(params, :rho, :eps) -type AdadeltaSolverState <: InternalSolverState +struct AdadeltaSolverState <: InternalSolverState param_states :: Vector{LayerState} gradients_sq :: Vector{Vector{Blob}} deltas_sq :: Vector{Vector{Blob}} end -type AdadeltaSolverSnapshot <: SolverStateSnapshot +struct AdadeltaSolverSnapshot <: SolverStateSnapshot iteration :: Int obj_val :: Float64 end diff --git a/src/solvers/adagrad.jl b/src/solvers/adagrad.jl index 209f9e9..7c08004 100644 --- a/src/solvers/adagrad.jl +++ b/src/solvers/adagrad.jl @@ -15,12 +15,12 @@ make_solver_parameters(method::Adagrad; kwargs...)= validate_parameters(method::Adagrad, params::SolverParameters) = validate_parameters(params, :gamma, :epsilon) -type AdagradSolverState <: InternalSolverState +struct AdagradSolverState <: InternalSolverState param_states :: Vector{LayerState} param_history :: Vector{Vector{Blob}} end -type AdagradSolverSnapshot <: SolverStateSnapshot +struct AdagradSolverSnapshot <: SolverStateSnapshot iteration :: Int obj_val :: Float64 end diff --git a/src/solvers/adam.jl b/src/solvers/adam.jl index 56c1edd..5826bf9 100644 --- a/src/solvers/adam.jl +++ b/src/solvers/adam.jl @@ -16,7 +16,7 @@ validate_parameters(solver::Adam, params::SolverParameters) = begin validate_parameters(params, :lr_policy, :beta1, :beta2, :epsilon) end -type AdamSolverState <: InternalSolverState +struct AdamSolverState <: InternalSolverState param_states :: Vector{LayerState} grad_1st_moment_est :: Vector{Vector{Blob}} # Exponentially weighted moving average - biased estimate of 1st moment of gradient grad_2nd_moment_est :: Vector{Vector{Blob}} # Exponentially weighted moving average - biased estimate of raw 2nd moment of gradient @@ -24,7 +24,7 @@ type AdamSolverState <: InternalSolverState learning_rate :: Float64 end -type AdamSolverStateSnapshot <: SolverStateSnapshot +struct AdamSolverStateSnapshot <: SolverStateSnapshot iter :: Int obj_val :: Float64 grad_1st_moment_est :: Vector{Vector{Array}} diff --git a/src/solvers/nesterov.jl b/src/solvers/nesterov.jl index b1d6d4d..d47caec 100644 --- a/src/solvers/nesterov.jl +++ b/src/solvers/nesterov.jl @@ -7,7 +7,7 @@ struct Nesterov <: SolverMethod end -type NesterovSolverState <: InternalSolverState +struct NesterovSolverState <: InternalSolverState learning_rate :: Float64 momentum :: Float64 param_states :: Vector{LayerState} @@ -15,7 +15,7 @@ type NesterovSolverState <: InternalSolverState last_momentum :: Float64 end -type NesterovSolverSnapshot <: SolverStateSnapshot +struct NesterovSolverSnapshot <: SolverStateSnapshot iteration :: Int obj_val :: Float64 learning_rate :: Float64 diff --git a/src/solvers/policies.jl b/src/solvers/policies.jl index 74171a0..d861aa0 100644 --- a/src/solvers/policies.jl +++ b/src/solvers/policies.jl @@ -10,24 +10,25 @@ export LRPolicy, get_learning_rate, MomPolicy, get_momentum module LRPolicy using ..Mocha using Compat -type Fixed <: LearningRatePolicy +using Printf +struct Fixed <: LearningRatePolicy base_lr :: AbstractFloat end # base_lr * gamma ^ (floor(iter / stepsize)) -type Step <: LearningRatePolicy +struct Step <: LearningRatePolicy base_lr :: AbstractFloat gamma :: AbstractFloat stepsize :: Int end # base_lr * gamma ^ iter -type Exp <: LearningRatePolicy +struct Exp <: LearningRatePolicy base_lr :: AbstractFloat gamma :: AbstractFloat end -type Inv <: LearningRatePolicy +struct Inv <: LearningRatePolicy base_lr :: AbstractFloat gamma :: AbstractFloat power :: AbstractFloat @@ -56,7 +57,7 @@ function decay_on_validation_listener(policy, key::AbstractString, coffee_lounge end end -type DecayOnValidation <: LearningRatePolicy +struct DecayOnValidation <: LearningRatePolicy gamma :: AbstractFloat key :: AbstractString @@ -85,7 +86,7 @@ type DecayOnValidation <: LearningRatePolicy end using Compat -type Staged <: LearningRatePolicy +struct Staged <: LearningRatePolicy stages :: Vector{@compat(Tuple{Int, LearningRatePolicy})} curr_stage :: Int @@ -156,21 +157,21 @@ end ############################################################ module MomPolicy -using ..Mocha.MomentumPolicy +using ..Mocha: MomentumPolicy using Compat -type Fixed <: MomentumPolicy +struct Fixed <: MomentumPolicy base_mom :: AbstractFloat end # min(base_mom * gamma ^ (floor(iter / stepsize)), max_mom) -type Step <: MomentumPolicy +struct Step <: MomentumPolicy base_mom :: AbstractFloat gamma :: AbstractFloat stepsize :: Int max_mom :: AbstractFloat end -type Linear <: MomentumPolicy +struct Linear <: MomentumPolicy base_mom :: AbstractFloat gamma :: AbstractFloat stepsize :: Int @@ -178,7 +179,7 @@ type Linear <: MomentumPolicy end using Compat -type Staged <: MomentumPolicy +struct Staged <: MomentumPolicy stages :: Vector{@compat(Tuple{Int, MomentumPolicy})} curr_stage :: Int diff --git a/src/solvers/sgd.jl b/src/solvers/sgd.jl index f4ea7d4..6779cce 100644 --- a/src/solvers/sgd.jl +++ b/src/solvers/sgd.jl @@ -10,7 +10,7 @@ make_solver_parameters(method::SGD; kwargs...) = merge(make_solver_parameters(), defaultDict, SolverParameters(kwargs)) -type SGDSolverState <: InternalSolverState +struct SGDSolverState <: InternalSolverState learning_rate :: Float64 momentum :: Float64 param_states :: Vector{LayerState} @@ -18,7 +18,7 @@ type SGDSolverState <: InternalSolverState last_momentum :: Float64 end -type SGDSolverSnapshot <: SolverStateSnapshot +struct SGDSolverSnapshot <: SolverStateSnapshot iteration :: Int obj_val :: Float64 learning_rate :: Float64 diff --git a/src/utils/gradient-checking.jl b/src/utils/gradient-checking.jl index 94cd71f..f0307e0 100644 --- a/src/utils/gradient-checking.jl +++ b/src/utils/gradient-checking.jl @@ -177,7 +177,7 @@ end ############################################################### function test_gradients(net::Net; epsilon=1e-6, digit=6, visual=true ) return typeof(net.backend) == Mocha.CPUBackend ? - gradient_check( net, epsilon, digit, visual ) :false + gradient_check( net, epsilon, digit, visual ) : false end diff --git a/test/utils/ref-count.jl b/test/utils/ref-count.jl index 94be6f8..98d1a68 100644 --- a/test/utils/ref-count.jl +++ b/test/utils/ref-count.jl @@ -1,4 +1,4 @@ -type MockResource +struct MockResource resource :: Vector{Int} rc :: RefCounter end diff --git a/tools/image-classifier.jl b/tools/image-classifier.jl index 7e3ce20..74de7f3 100644 --- a/tools/image-classifier.jl +++ b/tools/image-classifier.jl @@ -4,7 +4,7 @@ using Colors # requires Package Colors.jl using Mocha using Compat -type ImageClassifier +struct ImageClassifier net :: Net channel_order :: NTuple{3,Int} # The channel order of the trained net, (1,2,3) means RGB sp_order :: NTuple{2,Int} # The spatial order (1,2) means width-height (row major) From 5f640953d589634008b05f130e2fe1db02cc8502 Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Tue, 20 Nov 2018 01:39:44 -0500 Subject: [PATCH 09/24] correcting erroneous Array typing edits --- src/cuda/blob.jl | 3 +-- src/layers/crop.jl | 2 +- test/layers/random-normal.jl | 10 +++++----- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/cuda/blob.jl b/src/cuda/blob.jl index bf8a15b..79fb0c3 100644 --- a/src/cuda/blob.jl +++ b/src/cuda/blob.jl @@ -30,7 +30,7 @@ function copy!{T}(dst :: CuTensorBlob{T}, src :: CuTensorBlob{T}) @CUDA.cucall(:cuMemcpy, (Ptr{Nothing}, Ptr{Nothing}, Cint), dst.ptr.p, src.ptr.p, length(dst)*sizeof(T)) end function fill!{T}(dst :: CuTensorBlob{T}, val) - val_vec = Array{T}(length(dst)) + val_vec = Array{T}(undef,length(dst)) fill!(val_vec, val) copy!(dst, val_vec) end @@ -51,4 +51,3 @@ function destroy(blob :: CuTensorBlob) blob.ptr.p = 0 end end - diff --git a/src/layers/crop.jl b/src/layers/crop.jl index 14f463f..a4e9f80 100644 --- a/src/layers/crop.jl +++ b/src/layers/crop.jl @@ -18,7 +18,7 @@ function setup(backend::Backend, layer::CropLayer, inputs::Vector{Blob}, diffs:: @assert isa(diffs[i], NullBlob) # Back-propagation for crop-layer is not implemented end - blobs = Array(length(inputs)) where {Blob} + blobs = Array{Blob}(undef,length(inputs)) for i = 1:length(inputs) width, height, channels, num = size(inputs[i]) @assert layer.crop_size[1] <= width && layer.crop_size[2] <= height diff --git a/test/layers/random-normal.jl b/test/layers/random-normal.jl index f90dfcc..a9ef971 100644 --- a/test/layers/random-normal.jl +++ b/test/layers/random-normal.jl @@ -21,14 +21,14 @@ function test_random_normal_layer(backend::Backend, T, eps) eltype=T, batch_sizes=batch_sizes) state = setup(backend, layer, Blob[], Blob[]) - layer_data = [Array(tuple(output_dims..., batch_sizes[i])) where {T} + layer_data = [Array{T}(undef,tuple(output_dims..., batch_sizes[i])) for i in 1:N] forward(backend, state, Blob[]) for i in 1:N copy!(layer_data[i], state.blobs[i]) @test (abs(mean(layer_data[i])) < 4e-1) - @test all(-1000 .< layer_data[i] .< 1000) + @test all(-1000 .< layer_data[i] .< 1000) end # we should have sample from zero mean, unit stddev in state.blobs[1] @@ -36,9 +36,9 @@ function test_random_normal_layer(backend::Backend, T, eps) # output should be different on subsequent calls - layer_data2 = [Array(tuple(output_dims..., batch_sizes[i])) where {T} + layer_data2 = [Array{T}(undef,tuple(output_dims..., batch_sizes[i])) for i in 1:N] - + forward(backend, state, Blob[]) for i in 1:N copy!(layer_data2[i], state.blobs[i]) @@ -59,7 +59,7 @@ end if test_gpu test_random_normal_layer(backend_gpu) end - + if test_cpu test_random_normal_layer(backend_cpu) end From 3977f4e7519bcead91d60a06f04772f30172fbb2 Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Tue, 20 Nov 2018 13:56:08 -0500 Subject: [PATCH 10/24] Corrected UUID and added repo addres to Project.toml --- Project.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 43b916f..c9b1a22 100644 --- a/Project.toml +++ b/Project.toml @@ -1,5 +1,6 @@ name = "Mocha" -uuid = "8ac9dfb8-ec70-11e8-02ea-0ddc18c31797" +uuid = "f17d6557-5fdd-57bf-a30c-27e301b4ff87" +repo = "https://github.com/pluskid/Mocha.jl.git" [deps] Compat = "34da2185-b29b-5c13-b0c7-acf172513d20" From 7a282649d3983c2e5082c8dc42c476195a9ceabc Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Tue, 20 Nov 2018 17:04:00 -0500 Subject: [PATCH 11/24] initializing arrays with undef --- benchmarks/native-im2col/im2col-bm.jl | 2 +- src/blob.jl | 4 ++-- src/cuda/cuda.jl | 6 +++--- src/cuda/layers/channel-pooling.jl | 4 ++-- src/cuda/layers/convolution.jl | 6 +++--- src/cuda/layers/hinge-loss.jl | 2 +- src/cuda/layers/pooling.jl | 4 ++-- src/cuda/layers/softmax.jl | 4 ++-- src/layers/argmax.jl | 4 ++-- src/layers/async-hdf5-data.jl | 6 +++--- src/layers/channel-pooling.jl | 16 ++++++++-------- src/layers/convolution.jl | 8 ++++---- src/layers/hdf5-data.jl | 4 ++-- src/layers/hdf5-output.jl | 6 +++--- src/layers/index2onehot.jl | 4 ++-- src/layers/inner-product.jl | 6 +++--- src/layers/lrn.jl | 12 ++++++------ src/layers/memory-data.jl | 4 ++-- src/layers/memory-output.jl | 2 +- src/layers/pooling.jl | 12 ++++++------ src/layers/power.jl | 2 +- src/layers/random-mask.jl | 2 +- src/layers/random-normal.jl | 2 +- src/layers/softmax-loss.jl | 4 ++-- src/layers/split.jl | 4 ++-- src/layers/tied-inner-product.jl | 6 +++--- src/net.jl | 6 +++--- src/solvers/adadelta.jl | 4 ++-- src/solvers/adagrad.jl | 2 +- src/solvers/adam.jl | 8 ++++---- src/solvers/policies.jl | 4 ++-- src/solvers/sgd.jl | 2 +- src/utils/gradient-checking.jl | 6 +++--- test/layers/argmax.jl | 2 +- test/layers/channel-pooling.jl | 4 ++-- test/layers/concat.jl | 2 +- test/layers/convolution.jl | 4 ++-- test/layers/crop.jl | 4 ++-- test/layers/element-wise.jl | 2 +- test/layers/hdf5-data.jl | 2 +- test/layers/index2onehot.jl | 2 +- test/layers/inner-product.jl | 2 +- test/layers/memory-data.jl | 2 +- test/layers/memory-output.jl | 2 +- test/layers/pooling.jl | 4 ++-- test/layers/power.jl | 2 +- test/layers/reshape.jl | 2 +- test/layers/softmax.jl | 2 +- tools/image-classifier.jl | 4 ++-- 49 files changed, 105 insertions(+), 105 deletions(-) diff --git a/benchmarks/native-im2col/im2col-bm.jl b/benchmarks/native-im2col/im2col-bm.jl index 2cb06fd..5ee8e1b 100644 --- a/benchmarks/native-im2col/im2col-bm.jl +++ b/benchmarks/native-im2col/im2col-bm.jl @@ -74,7 +74,7 @@ img = rand(width, height, channels) width_out = div(width + 2*pad[1]-kernel[1], stride[1]) + 1 height_out = div(height + 2*pad[2]-kernel[2], stride[2]) + 1 -col_buffer = Array{Float64}(width_out, height_out, channels*prod(kernel)) +col_buffer = Array{Float64}(undef,width_out, height_out, channels*prod(kernel)) col_buffer2 = zeros(size(col_buffer)) im2col_jl() = im2col(img, col_buffer, width, height, channels, kernel, pad, stride) diff --git a/src/blob.jl b/src/blob.jl index 0bec094..733a72b 100644 --- a/src/blob.jl +++ b/src/blob.jl @@ -66,7 +66,7 @@ function show(io::IO, blob :: Blob) end function to_array(blob::Blob) - array = Array{eltype(blob)}(size(blob)) + array = Array{eltype(blob)}(undef,size(blob)) copy!(array, blob) array end @@ -129,7 +129,7 @@ end struct CPUBlob{T <: AbstractFloat, N} <: Blob{T, N} data :: AbstractArray{T, N} end -CPUBlob(t :: Type, dims::NTuple{N,Int}) where {N} = CPUBlob(Array{t}(dims)) +CPUBlob(t :: Type, dims::NTuple{N,Int}) where {N} = CPUBlob(undef,Array{t}(undef,dims)) function make_blob(backend::CPUBackend, data_type::Type, dims::NTuple{N,Int}) where {N} return CPUBlob(data_type, dims) diff --git a/src/cuda/cuda.jl b/src/cuda/cuda.jl index 3316898..6675693 100644 --- a/src/cuda/cuda.jl +++ b/src/cuda/cuda.jl @@ -128,7 +128,7 @@ const CTX_MAP_HOST = 0x08 const CTX_LMEM_RESIZE_TO_MAX = 0x10 function create_context(dev::CuDevice, flags::Integer) - a = Array{Ptr{Nothing}}(1) + a = Array{Ptr{Nothing}}(undef,1) @cucall(:cuCtxCreate_v2, (Ptr{Ptr{Nothing}}, Cuint, Cint), a, flags, dev.handle) return CuContext(a[1]) end @@ -193,7 +193,7 @@ struct CuModule handle::Ptr{Nothing} function CuModule(filename::AbstractString) - a = Array{Ptr{Nothing}}(1) + a = Array{Ptr{Nothing}}(undef,1) @cucall(:cuModuleLoad, (Ptr{Ptr{Nothing}}, Ptr{Cchar}), a, filename) new(a[1]) end @@ -208,7 +208,7 @@ struct CuFunction handle::Ptr{Nothing} function CuFunction(md::CuModule, name::String) - a = Array{Ptr{Nothing}}(1) + a = Array{Ptr{Nothing}}(undef,1) @cucall(:cuModuleGetFunction, (Ptr{Ptr{Nothing}}, Ptr{Nothing}, Ptr{Cchar}), a, md.handle, name) new(a[1]) diff --git a/src/cuda/layers/channel-pooling.jl b/src/cuda/layers/channel-pooling.jl index dd09ff0..b489e46 100644 --- a/src/cuda/layers/channel-pooling.jl +++ b/src/cuda/layers/channel-pooling.jl @@ -1,12 +1,12 @@ function setup_etc(backend::GPUBackend, layer::ChannelPoolingLayer, inputs, blobs) if isa(layer.pooling, Pooling.Max) - masks = Array{CuPtr}(length(inputs)) + masks = Array{CuPtr}(undef,length(inputs)) for i = 1:length(inputs) masks[i] = CUDA.cualloc(Csize_t, length(blobs[i])) end etc = masks elseif isa(layer.pooling, Pooling.Mean) - integrals = Array{CuPtr}(length(inputs)) + integrals = Array{CuPtr}(undef,length(inputs)) for i = 1:length(inputs) integrals[i] = CUDA.cualloc(eltype(inputs[i]), prod(size(inputs[i])[1:end-1])) end diff --git a/src/cuda/layers/convolution.jl b/src/cuda/layers/convolution.jl index 8ef6d1e..9a8fc8e 100644 --- a/src/cuda/layers/convolution.jl +++ b/src/cuda/layers/convolution.jl @@ -22,9 +22,9 @@ function setup_etc(backend::GPUBackend, layer::ConvolutionLayer, dtype, width, h div(channels,layer.n_group), div(layer.n_filter,layer.n_group))) bias_desc = CuDNN.create_tensor4d_descriptor(dtype, (1,1,div(layer.n_filter,layer.n_group),1)) - inputs_desc = Array{CuDNN.Tensor4dDescriptor}(length(inputs)) - outputs_desc = Array{CuDNN.Tensor4dDescriptor}(length(inputs)) - conv_desc = Array{CuDNN.ConvolutionDescriptor}(length(inputs)) + inputs_desc = Array{CuDNN.Tensor4dDescriptor}(undef,length(inputs)) + outputs_desc = Array{CuDNN.Tensor4dDescriptor}(undef,length(inputs)) + conv_desc = Array{CuDNN.ConvolutionDescriptor}(undef,length(inputs)) for i = 1:length(inputs) inputs_desc[i] = CuDNN.create_tensor4d_descriptor(dtype, (width,height,div(channels,layer.n_group),batch_size), (1, width, width*height, width*height*channels)) diff --git a/src/cuda/layers/hinge-loss.jl b/src/cuda/layers/hinge-loss.jl index 4132e6c..40cf89e 100644 --- a/src/cuda/layers/hinge-loss.jl +++ b/src/cuda/layers/hinge-loss.jl @@ -23,7 +23,7 @@ function forward(backend::GPUBackend, state::HingeLossLayerState, inputs::Vector CUDA.launch(kernel, (x_block,1), (CUDA.THREADS_PER_BLOCK_X, 1), (pred.ptr.p, label.ptr.p, n, state.loss_blob.ptr.p)) - losses = Array{data_type}(size(state.loss_blob)...) + losses = Array{data_type}(undef,size(state.loss_blob)...) copy!(losses, state.loss_blob) state.loss = state.layer.weight * sum(losses[1:x_block]) / get_num(pred) diff --git a/src/cuda/layers/pooling.jl b/src/cuda/layers/pooling.jl index 511397a..c282ecc 100644 --- a/src/cuda/layers/pooling.jl +++ b/src/cuda/layers/pooling.jl @@ -17,8 +17,8 @@ function setup_etc(backend::GPUBackend, layer::PoolingLayer, inputs, error("TODO: pooling mode $(layer.pooling) not supported by CuDNN") end pooling_desc = CuDNN.create_pooling_descriptor(pooling_mode, layer.kernel, layer.stride, layer.pad) - inputs_desc = Array{CuDNN.Tensor4dDescriptor}(length(inputs)) - outputs_desc = Array{CuDNN.Tensor4dDescriptor}(length(inputs)) + inputs_desc = Array{CuDNN.Tensor4dDescriptor}(undef,length(inputs)) + outputs_desc = Array{CuDNN.Tensor4dDescriptor}(undef,length(inputs)) for i = 1:length(inputs) width,height,channels,num = size(inputs[i]) diff --git a/src/cuda/layers/softmax.jl b/src/cuda/layers/softmax.jl index 7177411..b66c32c 100644 --- a/src/cuda/layers/softmax.jl +++ b/src/cuda/layers/softmax.jl @@ -4,8 +4,8 @@ struct CuDNNSoftmaxState end function setup_etc(backend::GPUBackend, layer::SoftmaxLayer, dims::Vector{Int}, data_type, inputs) - inputs_desc = Array{CuDNN.Tensor4dDescriptor}(length(inputs)) - outputs_desc = Array{CuDNN.Tensor4dDescriptor}(length(inputs)) + inputs_desc = Array{CuDNN.Tensor4dDescriptor}(undef,length(inputs)) + outputs_desc = Array{CuDNN.Tensor4dDescriptor}(undef,length(inputs)) for i = 1:length(inputs) dim_sp, dim_prob, dim_num = split_dims(inputs[i], dims[i]) inputs_desc[i] = CuDNN.create_tensor4d_descriptor(data_type, (1,dim_sp,dim_prob,dim_num)) diff --git a/src/layers/argmax.jl b/src/layers/argmax.jl index d157f05..ea77091 100644 --- a/src/layers/argmax.jl +++ b/src/layers/argmax.jl @@ -13,8 +13,8 @@ struct ArgmaxLayerState <: LayerState end function setup(backend::Backend, layer::ArgmaxLayer, inputs::Vector{Blob}, diffs::Vector{Blob}) - dims = Array{Int}(length(inputs)) - blobs = Array{Blob}(length(inputs)) + dims = Array{Int}(undef,length(inputs)) + blobs = Array{Blob}(undef,length(inputs)) for i = 1:length(inputs) total_dim = ndims(inputs[i]) dim = layer.dim < 0 ? layer.dim + total_dim + 1 : layer.dim diff --git a/src/layers/async-hdf5-data.jl b/src/layers/async-hdf5-data.jl index d576732..aa858e0 100644 --- a/src/layers/async-hdf5-data.jl +++ b/src/layers/async-hdf5-data.jl @@ -37,8 +37,8 @@ struct AsyncHDF5DataLayerState <: LayerState state.epoch = 0 # empty array, will be constructed in setup - state.blobs = Array{Blob}(length(layer.tops)) - state.trans = Array{Vector{DataTransformerState}}(length(layer.tops)) + state.blobs = Array{Blob}(undef,length(layer.tops)) + state.trans = Array{Vector{DataTransformerState}}(undef,length(layer.tops)) return state end @@ -71,7 +71,7 @@ function setup(backend::Backend, layer::AsyncHDF5DataLayer, inputs::Vector{Blob} function io_task_impl(channel) # data blocks to produce - data_blocks = Array[Array{eltype(x)}(size(x)) for x in state.blobs] + data_blocks = Array[Array{eltype(x)}(undef,size(x)) for x in state.blobs] n_done = 0 while true diff --git a/src/layers/channel-pooling.jl b/src/layers/channel-pooling.jl index 20a1e53..6b618e1 100644 --- a/src/layers/channel-pooling.jl +++ b/src/layers/channel-pooling.jl @@ -23,15 +23,15 @@ end function setup_etc(backend::CPUBackend, layer::ChannelPoolingLayer, inputs, blobs) if isa(layer.pooling, Pooling.Max) - masks = Array{Array}(length(inputs)) + masks = Array{Array}(undef,length(inputs)) for i = 1:length(inputs) - masks[i] = Array{Csize_t}(size(blobs[i])) + masks[i] = Array{Csize_t}(undef,size(blobs[i])) end etc = masks elseif isa(layer.pooling, Pooling.Mean) - integrals = Array{Array}(length(inputs)) + integrals = Array{Array}(undef,length(inputs)) for i = 1:length(inputs) - integrals[i] = Array{eltype(inputs[i])}(size(inputs[i])[1:end-1]) + integrals[i] = Array{eltype(inputs[i])}(undef,size(inputs[i])[1:end-1]) end etc = integrals else @@ -41,10 +41,10 @@ function setup_etc(backend::CPUBackend, layer::ChannelPoolingLayer, inputs, blob end function setup(backend::Backend, layer::ChannelPoolingLayer, inputs::Vector{Blob}, diffs::Vector{Blob}) - pooled_chann_all = Array{Int}(length(inputs)) - blobs = Array{Blob}(length(inputs)) - blobs_diff = Array{Blob}(length(inputs)) - op_dims = Array{Int}(length(inputs)) + pooled_chann_all = Array{Int}(undef,length(inputs)) + blobs = Array{Blob}(undef,length(inputs)) + blobs_diff = Array{Blob}(undef,length(inputs)) + op_dims = Array{Int}(undef,length(inputs)) for i = 1:length(inputs) dim_total = ndims(inputs[i]) diff --git a/src/layers/convolution.jl b/src/layers/convolution.jl index cdaa83b..370eec1 100644 --- a/src/layers/convolution.jl +++ b/src/layers/convolution.jl @@ -43,14 +43,14 @@ function setup_etc(backend::CPUBackend, layer::ConvolutionLayer, dtype, width, h layer.pad[1] == 0 && layer.pad[2] == 0 col_buffer = NullBlob() else - col_buffer = CPUBlob(Array{dtype}(width_out, height_out, channels*prod(layer.kernel), 1)) + col_buffer = CPUBlob(Array{dtype}(undef,width_out, height_out, channels*prod(layer.kernel), 1)) end M = height_out * width_out N = div(layer.n_filter, layer.n_group) K = div(channels * layer.kernel[1] * layer.kernel[2], layer.n_group) bias_multiplier = make_blob(backend, dtype, M, 1, 1, 1) fill!(bias_multiplier, convert(dtype,1)) - img_buffer = Array{dtype}(width, height, channels) + img_buffer = Array{dtype}(undef,width, height, channels) etc = CPUConvState(col_buffer, M, N, K, bias_multiplier, img_buffer) return etc end @@ -88,8 +88,8 @@ struct ConvolutionLayerState <: LayerState @assert dtype == eltype(inputs[i]) end - blobs = Array{Blob}(length(inputs)) - blobs_diff = Array{Blob}(length(inputs)) + blobs = Array{Blob}(undef,length(inputs)) + blobs_diff = Array{Blob}(undef,length(inputs)) for i = 1:length(inputs) blobs[i] = make_blob(backend, dtype, width_out, height_out, layer.n_filter, batch_size) diff --git a/src/layers/hdf5-data.jl b/src/layers/hdf5-data.jl index fa70276..87c7835 100644 --- a/src/layers/hdf5-data.jl +++ b/src/layers/hdf5-data.jl @@ -56,8 +56,8 @@ struct HDF5DataLayerState <: LayerState state.shuffle_idx = Int[] end - state.blobs = Array{Blob}(length(layer.tops)) - state.trans = Array{Vector{DataTransformerState}}(length(layer.tops)) + state.blobs = Array{Blob}(undef,length(layer.tops)) + state.trans = Array{Vector{DataTransformerState}}(undef,length(layer.tops)) transformers = convert(Vector{@compat(Tuple{Symbol, DataTransformerType})}, layer.transformers) for i = 1:length(state.blobs) dims = size(state.dsets[i]) diff --git a/src/layers/hdf5-output.jl b/src/layers/hdf5-output.jl index 1674438..3cf41eb 100644 --- a/src/layers/hdf5-output.jl +++ b/src/layers/hdf5-output.jl @@ -34,14 +34,14 @@ function setup(backend::Backend, layer::HDF5OutputLayer, inputs::Vector{Blob}, d end file = h5open(layer.filename, "w") - buffer = Array{Array}(length(inputs)) - dsets = Array{Any}(length(inputs)) + buffer = Array{Array}(undef,length(inputs)) + dsets = Array{Any}(undef,length(inputs)) for i = 1:length(inputs) data_type = eltype(inputs[i]) dims = size(inputs[i]) dsets[i] = d_create(file, string(datasets[i]), datatype(data_type), dataspace(dims, max_dims=tuple(dims[1:end-1]..., -1)), "chunk", dims) - buffer[i] = Array{data_type}(dims) + buffer[i] = Array{data_type}(undef,dims) end return HDF5OutputLayerState(layer, file, buffer, dsets, 1) diff --git a/src/layers/index2onehot.jl b/src/layers/index2onehot.jl index 5877ae8..fd9451e 100644 --- a/src/layers/index2onehot.jl +++ b/src/layers/index2onehot.jl @@ -15,8 +15,8 @@ end function setup(backend::Backend, layer::Index2OnehotLayer, inputs::Vector{Blob}, diffs::Vector{Blob}) data_type = eltype(inputs[1]) - dims = Array{Int}(length(inputs)) - blobs = Array{Blob}(length(inputs)) + dims = Array{Int}(undef,length(inputs)) + blobs = Array{Blob}(undef,length(inputs)) for i = 1:length(inputs) total_dim = ndims(inputs[i]) dim = layer.dim < 0 ? layer.dim + total_dim + 1 : layer.dim diff --git a/src/layers/inner-product.jl b/src/layers/inner-product.jl index 4fa4b7b..203697c 100644 --- a/src/layers/inner-product.jl +++ b/src/layers/inner-product.jl @@ -48,9 +48,9 @@ struct InnerProductLayerState <: LayerState @assert eltype(inputs[i]) == data_type end - blobs = Array{Blob}(length(inputs)) - blobs_diff = Array{Blob}(length(inputs)) - bias_multipliers = Array{Blob}(length(inputs)) + blobs = Array{Blob}(undef,length(inputs)) + blobs_diff = Array{Blob}(undef,length(inputs)) + bias_multipliers = Array{Blob}(undef,length(inputs)) for i = 1:length(inputs) nums = get_num(inputs[i]) diff --git a/src/layers/lrn.jl b/src/layers/lrn.jl index 9e2d0f0..b60591a 100644 --- a/src/layers/lrn.jl +++ b/src/layers/lrn.jl @@ -43,31 +43,31 @@ function setup(backend::Backend, layer::LRNLayer, inputs::Vector{Blob}, diffs::V @assert ndims(inputs[i]) == 4 end - split_layer = SplitLayer(no_copy=true, tops=Array{Symbol}(2), bottoms=Array{Symbol}(1)) + split_layer = SplitLayer(no_copy=true, tops=Array{Symbol}(undef,2), bottoms=Array{Symbol}(undef,1)) do_split = setup(backend, split_layer, inputs, diffs) - square_layer = PowerLayer(power=2, tops=Array{Symbol}(1), bottoms=Array{Symbol}(1)) + square_layer = PowerLayer(power=2, tops=Array{Symbol}(undef,1), bottoms=Array{Symbol}(undef,1)) do_square = setup(backend, square_layer, Blob[do_split.blobs[1]], Blob[do_split.blobs_diff[1]]) pre_pad = div(layer.kernel-1,2) if isa(layer.mode, LRNMode.AcrossChannel) post_pad = layer.kernel - pre_pad - 1 - pool_layer = ChannelPoolingLayer(tops=Array{Symbol}(1), bottoms=Array{Symbol}(1), + pool_layer = ChannelPoolingLayer(tops=Array{Symbol}(undef,1), bottoms=Array{Symbol}(undef,1), kernel=layer.kernel, stride=1, pad=(pre_pad,post_pad), pooling=Pooling.Mean(), channel_dim=layer.channel_dim) elseif isa(layer.mode, LRNMode.WithinChannel) - pool_layer = PoolingLayer(tops=Array{Symbol}(1), bottoms=Array{Symbol}(1), + pool_layer = PoolingLayer(tops=Array{Symbol}(undef,1), bottoms=Array{Symbol}(undef,1), kernel=(layer.kernel,layer.kernel), stride=(1,1), pad=(pre_pad,pre_pad), pooling=Pooling.Mean()) end do_pool = setup(backend, pool_layer, do_square.blobs, do_square.blobs_diff) - power_layer = PowerLayer(tops=Array{Symbol}(1), bottoms=Array{Symbol}(1), + power_layer = PowerLayer(tops=Array{Symbol}(undef,1), bottoms=Array{Symbol}(undef,1), power=layer.power, scale=layer.scale, shift=layer.shift) do_power = setup(backend, power_layer, do_pool.blobs, do_pool.blobs_diff) - div_layer = ElementWiseLayer(tops=Array{Symbol}(1), bottoms=Array{Symbol}(2), + div_layer = ElementWiseLayer(tops=Array{Symbol}(undef,1), bottoms=Array{Symbol}(undef,2), operation = ElementWiseFunctors.Divide()) do_div = setup(backend, div_layer, Blob[do_split.blobs[2],do_power.blobs[1]], diff --git a/src/layers/memory-data.jl b/src/layers/memory-data.jl index 2fca9d8..ab74efa 100644 --- a/src/layers/memory-data.jl +++ b/src/layers/memory-data.jl @@ -20,8 +20,8 @@ struct MemoryDataLayerState <: LayerState shuffle_idx :: Vector{Int} MemoryDataLayerState(backend::Backend, layer::MemoryDataLayer) = begin - blobs = Array{Blob}(length(layer.tops)) - trans = Array{Vector{DataTransformerState}}(length(layer.tops)) + blobs = Array{Blob}(undef,length(layer.tops)) + trans = Array{Vector{DataTransformerState}}(undef,length(layer.tops)) transformers = convert(Vector{@compat(Tuple{Symbol, DataTransformerType})}, layer.transformers) for i = 1:length(blobs) dims = tuple(size(layer.data[i])[1:end-1]..., layer.batch_size) diff --git a/src/layers/memory-output.jl b/src/layers/memory-output.jl index cd04735..9460181 100644 --- a/src/layers/memory-output.jl +++ b/src/layers/memory-output.jl @@ -20,7 +20,7 @@ function reset_outputs(state::MemoryOutputLayerState) end function setup(backend::Backend, layer::MemoryOutputLayer, inputs::Vector{Blob}, diffs::Vector{Blob}) - outputs = Array{Vector{Array}}(length(inputs)) + outputs = Array{Vector{Array}}(undef,length(inputs)) for i = 1:length(inputs) outputs[i] = Array[] end diff --git a/src/layers/pooling.jl b/src/layers/pooling.jl index 3fa7154..ae13fa8 100644 --- a/src/layers/pooling.jl +++ b/src/layers/pooling.jl @@ -25,10 +25,10 @@ function setup_etc(backend::CPUBackend, layer::PoolingLayer, inputs, pooled_width, pooled_height) if isa(layer.pooling, Pooling.Max) - masks = Array{Array}(length(inputs)) + masks = Array{Array}(undef,length(inputs)) for i = 1:length(inputs) width,height,channels,num = size(inputs[i]) - masks[i] = Array{Csize_t}(pooled_width[i], pooled_height[i], channels, num) + masks[i] = Array{Csize_t}(undef,pooled_width[i], pooled_height[i], channels, num) end etc = masks else @@ -38,10 +38,10 @@ function setup_etc(backend::CPUBackend, layer::PoolingLayer, inputs, end function setup(backend::Backend, layer::PoolingLayer, inputs::Vector{Blob}, diffs::Vector{Blob}) - blobs = Array{Blob}(length(inputs)) - blobs_diff = Array{Blob}(length(inputs)) - pw_all = Array{Int}(length(inputs)) - ph_all = Array{Int}(length(inputs)) + blobs = Array{Blob}(undef,length(inputs)) + blobs_diff = Array{Blob}(undef,length(inputs)) + pw_all = Array{Int}(undef,length(inputs)) + ph_all = Array{Int}(undef,length(inputs)) for i = 1:length(inputs) width,height,channels,num = size(inputs[i]) diff --git a/src/layers/power.jl b/src/layers/power.jl index 0772e52..ccf1e51 100644 --- a/src/layers/power.jl +++ b/src/layers/power.jl @@ -21,7 +21,7 @@ end function setup(backend::Backend, layer::PowerLayer, inputs::Vector{Blob}, diffs::Vector{Blob}) blobs = Blob[make_blob(backend, eltype(x), size(x)) for x in inputs] - blobs_diff = Array{Blob}(length(inputs)) + blobs_diff = Array{Blob}(undef,length(inputs)) for i = 1:length(inputs) # if the bottom layer does not need back propagate, I don't need, either if isa(diffs[i], NullBlob) diff --git a/src/layers/random-mask.jl b/src/layers/random-mask.jl index 1234faf..1ed4a8c 100644 --- a/src/layers/random-mask.jl +++ b/src/layers/random-mask.jl @@ -15,7 +15,7 @@ struct RandomMaskLayerState <: LayerState end function setup(backend::Backend, layer::RandomMaskLayer, inputs::Vector{Blob}, diffs::Vector{Blob}) - dropouts = Array{DropoutLayerState}(length(inputs)) + dropouts = Array{DropoutLayerState}(undef,length(inputs)) for i = 1:length(inputs) dropout_layer = DropoutLayer(name="$(layer.name)-dropout-$i", auto_scale=false, ratio=layer.ratio, bottoms=Symbol[Symbol("$(layer.bottoms[i])-$i")]) diff --git a/src/layers/random-normal.jl b/src/layers/random-normal.jl index 050d70e..e8d8bbd 100644 --- a/src/layers/random-normal.jl +++ b/src/layers/random-normal.jl @@ -17,7 +17,7 @@ struct RandomNormalLayerState <: LayerState etc :: Vector{Any} RandomNormalLayerState(backend::Backend, layer::RandomNormalLayer) = begin - blobs = Array{Blob}(length(layer.tops)) + blobs = Array{Blob}(undef,length(layer.tops)) for i = 1:length(blobs) dims = tuple(layer.output_dims..., layer.batch_sizes[i]) blobs[i] = make_blob(backend, layer.eltype, dims...) diff --git a/src/layers/softmax-loss.jl b/src/layers/softmax-loss.jl index ae0d438..ef7206e 100644 --- a/src/layers/softmax-loss.jl +++ b/src/layers/softmax-loss.jl @@ -26,10 +26,10 @@ end function setup(backend::Backend, layer::SoftmaxLossLayer, inputs::Vector{Blob}, diffs::Vector{Blob}) data_type = eltype(inputs[1]) - softmax_layer = SoftmaxLayer(tops=Array{Symbol}(length(inputs)), bottoms=Array{Symbol}(length(inputs)), dim=layer.dim) + softmax_layer = SoftmaxLayer(tops=Array{Symbol}(undef,length(inputs)), bottoms=Array{Symbol}(undef,length(inputs)), dim=layer.dim) softmax = setup(backend, softmax_layer, Blob[inputs[1]], Blob[]) - logistic_layer = MultinomialLogisticLossLayer(bottoms=Array{Symbol}(2), + logistic_layer = MultinomialLogisticLossLayer(bottoms=Array{Symbol}(undef,2), weights=layer.weights, normalize=layer.normalize, dim=layer.dim) logistic = setup(backend, logistic_layer, inputs, Blob[]) diff --git a/src/layers/split.jl b/src/layers/split.jl index 74dee39..0815237 100644 --- a/src/layers/split.jl +++ b/src/layers/split.jl @@ -24,13 +24,13 @@ function setup(backend::Backend, layer::SplitLayer, inputs::Vector{Blob}, diffs: # directly re-use the input blob blobs = Blob[inputs[1] for i = 1:N] else - blobs = Array{Blob}(N) + blobs = Array{Blob}(undef,N) blobs[1] = inputs[1] for i = 2:N blobs[i] = make_blob(backend, eltype(inputs[1]), size(inputs[1])) end end - blobs_diff = Array{Blob}(N) + blobs_diff = Array{Blob}(undef,N) blobs_diff[1] = diffs[1] # re-use the first backward blob for i = 2:N if isa(diffs[1], NullBlob) diff --git a/src/layers/tied-inner-product.jl b/src/layers/tied-inner-product.jl index d458de9..29d3701 100644 --- a/src/layers/tied-inner-product.jl +++ b/src/layers/tied-inner-product.jl @@ -48,9 +48,9 @@ struct TiedInnerProductLayerState <: LayerState @assert size(W, 2) == fea_size out_dim = size(W, 1) - blobs = Array{Blob}(length(inputs)) - blobs_diff = Array{Blob}(length(inputs)) - bias_multipliers = Array{Blob}(length(inputs)) + blobs = Array{Blob}(undef,length(inputs)) + blobs_diff = Array{Blob}(undef,length(inputs)) + bias_multipliers = Array{Blob}(undef,length(inputs)) for i = 1:length(inputs) nums = get_num(inputs[i]) diff --git a/src/net.jl b/src/net.jl index bd3b852..f7dcbea 100644 --- a/src/net.jl +++ b/src/net.jl @@ -200,9 +200,9 @@ Net(name::AbstractString, backend::Backend, layers :: Vector{Layer}) = begin data_layers = find(l -> is_source(l), layers) n = length(layers) - states = Array{LayerState}(n) - blobs_forward = Array{Vector{Blob}}(n) - blobs_backward = Array{Vector{Blob}}(n) + states = Array{LayerState}(undef,n) + blobs_forward = Array{Vector{Blob}}(undef,n) + blobs_backward = Array{Vector{Blob}}(undef,n) output_blobs = Dict{Symbol,Blob}() diff_blobs = Dict{Symbol,Blob}() diff --git a/src/solvers/adadelta.jl b/src/solvers/adadelta.jl index 7d60c99..c29455e 100644 --- a/src/solvers/adadelta.jl +++ b/src/solvers/adadelta.jl @@ -41,8 +41,8 @@ end AdadeltaSolverState(net::Net) = begin param_states = updatable_layer_states(net) - gradients_sq = Array{Vector{Blob}}(length(param_states)) - deltas_sq = Array{Vector{Blob}}(length(param_states)) + gradients_sq = Array{Vector{Blob}}(undef,length(param_states)) + deltas_sq = Array{Vector{Blob}}(undef,length(param_states)) for i = 1:length(param_states) state = param_states[i] diff --git a/src/solvers/adagrad.jl b/src/solvers/adagrad.jl index 7c08004..4e72838 100644 --- a/src/solvers/adagrad.jl +++ b/src/solvers/adagrad.jl @@ -41,7 +41,7 @@ end AdagradSolverState(net::Net) = begin param_states = updatable_layer_states(net) - param_history = Array{Vector{Blob}}(length(param_states)) + param_history = Array{Vector{Blob}}(undef,length(param_states)) for i = 1:length(param_states) state = param_states[i] diff --git a/src/solvers/adam.jl b/src/solvers/adam.jl index 5826bf9..27bbdd5 100644 --- a/src/solvers/adam.jl +++ b/src/solvers/adam.jl @@ -34,9 +34,9 @@ struct AdamSolverStateSnapshot <: SolverStateSnapshot end function blobs_clone(blobs::Vector{Vector{Blob}}) - out = Array{Vector{Array}}(length(blobs)) + out = Array{Vector{Array}}(undef,length(blobs)) for (i, vecblobs) in enumerate(blobs) - out[i] = [Array{eltype(b)}(size(b)) for b in vecblobs] + out[i] = [Array{eltype(b)}(undef,size(b)) for b in vecblobs] for (dst, b) in zip(out[i], vecblobs) copy!(dst, b) end @@ -76,8 +76,8 @@ end AdamSolverState(learning_rate::Float64, net::Net) = begin param_states = updatable_layer_states(net) - grad_1st_moment_est = Array{Vector{Blob}}(length(param_states)) - grad_2nd_moment_est = Array{Vector{Blob}}(length(param_states)) + grad_1st_moment_est = Array{Vector{Blob}}(undef,length(param_states)) + grad_2nd_moment_est = Array{Vector{Blob}}(undef,length(param_states)) for i = 1:length(param_states) layerstate = param_states[i] diff --git a/src/solvers/policies.jl b/src/solvers/policies.jl index d861aa0..e65306c 100644 --- a/src/solvers/policies.jl +++ b/src/solvers/policies.jl @@ -91,7 +91,7 @@ struct Staged <: LearningRatePolicy curr_stage :: Int Staged(stages...) = begin - accum_stages = Array{@compat(Tuple{Int, LearningRatePolicy})}(length(stages)) + accum_stages = Array{@compat(Tuple{Int, LearningRatePolicy})}(undef,length(stages)) accum_iter = 0 for i = 1:length(stages) (n, lrp) = stages[i] @@ -184,7 +184,7 @@ struct Staged <: MomentumPolicy curr_stage :: Int Staged(stages...) = begin - accum_stages = Array{@compat(Tuple{Int, MomentumPolicy})}(length(stages)) + accum_stages = Array{@compat(Tuple{Int, MomentumPolicy})}(undef,length(stages)) accum_iter = 0 for i = 1:length(stages) (n, mmp) = stages[i] diff --git a/src/solvers/sgd.jl b/src/solvers/sgd.jl index 6779cce..2cbf47b 100644 --- a/src/solvers/sgd.jl +++ b/src/solvers/sgd.jl @@ -52,7 +52,7 @@ end SGDSolverState(net::Net, learning_rate::Float64, momentum::Float64) = begin param_states = updatable_layer_states(net) - param_history = Array{Vector{Blob}}(length(param_states)) + param_history = Array{Vector{Blob}}(undef,length(param_states)) for i = 1:length(param_states) state = param_states[i] diff --git a/src/utils/gradient-checking.jl b/src/utils/gradient-checking.jl index f0307e0..86e7f95 100644 --- a/src/utils/gradient-checking.jl +++ b/src/utils/gradient-checking.jl @@ -39,7 +39,7 @@ end # space, where unrolling becomes trivial # it left as an exercise for the reader function unroll_parameters( model::Net ) - theta = Array{Float64}() # initial state is one length ??? weird + theta = Array{Float64}(undef,0) # initial state is one length ??? weird for l = 1:length(model.layers) if Mocha.has_param(model.layers[l]) @@ -56,7 +56,7 @@ function unroll_parameters( model::Net ) end function unroll_gradients( model::Net ) - theta = Array{Float64}() # initial state is one length ??? weird + theta = Array{Float64}(undef,0) # initial state is one length ??? weird for l = 1:length(model.layers) if Mocha.has_param(model.layers[l]) for m in model.states[l].parameters @@ -152,7 +152,7 @@ function gradient_check(model::Net, epsilon::Float64, digit::Int, visual::Bool) # ∇⁺ = round(∇⁺, 4); ∇ = round(∇, 4) idx = round.( abs.(∇ᵋ - ∇), digit ) .!= 0 if visual - δ = Array{Char}(length(idx)); fill!(δ,'.') + δ = Array{Char}(undef,length(idx)); fill!(δ,'.') δ[idx] = 'x' show(model, δ) #show(model,round(∇ᵋ,digit) ); show(model,round(∇,digit)) diff --git a/test/layers/argmax.jl b/test/layers/argmax.jl index a7673e8..9a7bddd 100644 --- a/test/layers/argmax.jl +++ b/test/layers/argmax.jl @@ -10,7 +10,7 @@ function test_argmax_layer(backend::Backend, n_input, tensor_dim, T, eps) diff_blob = Blob[NullBlob() for i = 1:n_input] println(" > Setup") - layer = ArgmaxLayer(bottoms=Array{Symbol}(n_input),tops=Array{Symbol}(n_input),dim=op_dim) + layer = ArgmaxLayer(bottoms=Array{Symbol}(undef,n_input),tops=Array{Symbol}(undef,n_input),dim=op_dim) state = setup(backend, layer, input_blob, diff_blob) println(" > Forward") diff --git a/test/layers/channel-pooling.jl b/test/layers/channel-pooling.jl index 5d34003..41eb8d5 100644 --- a/test/layers/channel-pooling.jl +++ b/test/layers/channel-pooling.jl @@ -10,7 +10,7 @@ function test_channel_pooling_layer(backend::Backend, pooling::PoolingFunction, println(" > Setup (pool along dimension $op_dim for $tensor_dim-D tensors)") layer = ChannelPoolingLayer(kernel=kernel, stride=stride, pad=pad, pooling=pooling, - tops=Array{Symbol}(n_input), bottoms=Array{Symbol}(n_input), channel_dim=op_dim) + tops=Array{Symbol}(undef,n_input), bottoms=Array{Symbol}(undef,n_input), channel_dim=op_dim) input = [rand(T, dim...) for dim in dims] inputs = Blob[make_blob(backend, x) for x in input] @@ -21,7 +21,7 @@ function test_channel_pooling_layer(backend::Backend, pooling::PoolingFunction, println(" > Forward") forward(backend, state, inputs) - payloads = Array{Any}(n_input) + payloads = Array{Any}(undef,n_input) for i = 1:n_input expected_output, payloads[i] = channel_pooling_forward(state, i, input[i], op_dim) got_output = to_array(state.blobs[i]) diff --git a/test/layers/concat.jl b/test/layers/concat.jl index 2c5f4f3..6197fa1 100644 --- a/test/layers/concat.jl +++ b/test/layers/concat.jl @@ -16,7 +16,7 @@ function test_concat_layer(backend::Backend, dim, T, eps) input_blobs = Blob[make_blob(backend, x) for x in inputs] grad_blobs = Blob[make_blob(backend, x) for x in inputs] - layer = ConcatLayer(dim=dim, bottoms=Array{Symbol}(n_input), tops=[:concat]) + layer = ConcatLayer(dim=dim, bottoms=Array{Symbol}(undef,n_input), tops=[:concat]) state = setup(backend, layer, input_blobs, grad_blobs) println(" > Forward") diff --git a/test/layers/convolution.jl b/test/layers/convolution.jl index d05786d..4109352 100644 --- a/test/layers/convolution.jl +++ b/test/layers/convolution.jl @@ -13,7 +13,7 @@ function test_convolution_layer(backend::Backend, n_group, filter_w, filter_h, p layer = ConvolutionLayer(name="conv", kernel=(filter_w, filter_h), stride=(stride_w, stride_h), pad=(pad_w,pad_h), n_filter=n_filter, n_group=n_group, - tops=Array{Symbol}(n_input), bottoms=Array{Symbol}(n_input)) + tops=Array{Symbol}(undef,n_input), bottoms=Array{Symbol}(undef,n_input)) # convolution layer requires each input blob to be the same shape input = [rand(T, input_dims) for i = 1:n_input] @@ -94,7 +94,7 @@ function convolution_forward(state, filter::Array, bias::Array, input::Array) o_g = round(Int, state.layer.n_filter / n_group) k_g = round(Int, channel / n_group) - output = Array{eltype(input)}(size(state.blobs[1])) + output = Array{eltype(input)}(undef,size(state.blobs[1])) output[:] = 0 for n = 1:num diff --git a/test/layers/crop.jl b/test/layers/crop.jl index abb3a03..0b26838 100644 --- a/test/layers/crop.jl +++ b/test/layers/crop.jl @@ -8,7 +8,7 @@ function test_crop_layer(backend::Backend, do_mirror, n_input, T, eps) diff_blob = Blob[NullBlob() for i = 1:n_input] println(" > Setup") - layer = CropLayer(bottoms=Array{Symbol}(n_input), tops=Array{Symbol}(n_input), + layer = CropLayer(bottoms=Array{Symbol}(undef,n_input), tops=Array{Symbol}(undef,n_input), crop_size=crop_size, random_mirror=do_mirror) state = setup(backend, layer, input_blob, diff_blob) @@ -41,7 +41,7 @@ function test_crop_layer_random(backend::Backend, do_mirror, n_input, T, eps) diff_blob = Blob[NullBlob() for i = 1:n_input] println(" > Setup") - layer = CropLayer(bottoms=Array{Symbol}(n_input), tops=Array{Symbol}(n_input), crop_size=crop_size, + layer = CropLayer(bottoms=Array{Symbol}(undef,n_input), tops=Array{Symbol}(undef,n_input), crop_size=crop_size, random_mirror=do_mirror, random_crop=true) state = setup(backend, layer, input_blob, diff_blob) diff --git a/test/layers/element-wise.jl b/test/layers/element-wise.jl index f6368fa..85bd53d 100644 --- a/test/layers/element-wise.jl +++ b/test/layers/element-wise.jl @@ -10,7 +10,7 @@ function test_element_wise_layer(backend::Backend, op::ElementWiseFunctorType, j input_blobs = Blob[make_blob(backend, x) for x in inputs] diff_blobs = Blob[make_blob(backend, x) for x in inputs] - layer = ElementWiseLayer(bottoms=Array{Symbol}(NArg), tops=[:result], operation=op) + layer = ElementWiseLayer(bottoms=Array{Symbol}(undef,NArg), tops=[:result], operation=op) state = setup(backend, layer, input_blobs, diff_blobs) forward(backend, state, input_blobs) diff --git a/test/layers/hdf5-data.jl b/test/layers/hdf5-data.jl index 448a504..2e2a68d 100644 --- a/test/layers/hdf5-data.jl +++ b/test/layers/hdf5-data.jl @@ -47,7 +47,7 @@ function test_hdf5_data_layer(backend::Backend, async, T, eps) data = data * scale data_idx = map(x->1:x, data_dim) - layer_data = Array{eltype(data)}(tuple(data_dim..., batch_size)) + layer_data = Array{eltype(data)}(undef,tuple(data_dim..., batch_size)) for i = 1:batch_size:size(data)[end]-batch_size+1 forward(backend, state, Blob[]) copy!(layer_data, state.blobs[1]) diff --git a/test/layers/index2onehot.jl b/test/layers/index2onehot.jl index a3c3536..eeb3960 100644 --- a/test/layers/index2onehot.jl +++ b/test/layers/index2onehot.jl @@ -12,7 +12,7 @@ function test_index2onehot_layer(backend::Backend, tensor_dim, n_input, T, eps) input_blob = Blob[make_blob(backend, x) for x in input] diff_blob = Blob[NullBlob() for i = 1:n_input] - layer = Index2OnehotLayer(tops=Array{Symbol}(n_input), bottoms=Array{Symbol}(n_input), + layer = Index2OnehotLayer(tops=Array{Symbol}(undef,n_input), bottoms=Array{Symbol}(undef,n_input), dim=expand_dim, n_class=n_class) state = setup(backend, layer, input_blob, diff_blob) diff --git a/test/layers/inner-product.jl b/test/layers/inner-product.jl index 8c11f79..c2c52a6 100644 --- a/test/layers/inner-product.jl +++ b/test/layers/inner-product.jl @@ -20,7 +20,7 @@ function test_inner_product_layer(backend::Backend, n_input, T, eps) # Setup ############################################################ layer = InnerProductLayer(name="ip", output_dim=target_dim, - tops=Array{Symbol}(n_input), bottoms=Array{Symbol}(n_input)) + tops=Array{Symbol}(undef,n_input), bottoms=Array{Symbol}(undef,n_input)) inputs = Blob[make_blob(backend, x) for x in X] diffs = Blob[make_blob(backend, x) for x in X] diff --git a/test/layers/memory-data.jl b/test/layers/memory-data.jl index c7b8cbe..1204021 100644 --- a/test/layers/memory-data.jl +++ b/test/layers/memory-data.jl @@ -23,7 +23,7 @@ function test_memory_data_layer(backend::Backend, T, eps) state = setup(backend, layer, Blob[], Blob[]) data_idx = map(x->1:x, data_dim) - layer_data = Array{eltype(data)}(tuple(data_dim..., batch_size)) + layer_data = Array{eltype(data)}(undef,tuple(data_dim..., batch_size)) data_aug = cat(tensor_dim+1, data, data) data_aug .-= mean_data diff --git a/test/layers/memory-output.jl b/test/layers/memory-output.jl index a3333df..247e3e7 100644 --- a/test/layers/memory-output.jl +++ b/test/layers/memory-output.jl @@ -9,7 +9,7 @@ function test_memory_output_layer(backend::Backend, T, eps) input_blobs = Blob[make_blob(backend, x) for x in inputs] layer = MemoryOutputLayer(bottoms=[:input1, :input2]) - state = setup(backend, layer, input_blobs, Array{Blob}(length(inputs))) + state = setup(backend, layer, input_blobs, Array{Blob}(undef,length(inputs))) # repeat 2 times forward(backend, state, input_blobs) diff --git a/test/layers/pooling.jl b/test/layers/pooling.jl index 3af4a1c..caac86c 100644 --- a/test/layers/pooling.jl +++ b/test/layers/pooling.jl @@ -37,7 +37,7 @@ function test_pooling_layer(backend::Backend, pooling::PoolingFunction, has_padd end layer = PoolingLayer(kernel=(kernel_w,kernel_h), stride=(stride_w,stride_h), pad=padding, - tops=Array{Symbol}(n_input), bottoms=Array{Symbol}(n_input), pooling=pooling) + tops=Array{Symbol}(undef,n_input), bottoms=Array{Symbol}(undef,n_input), pooling=pooling) input = [rand(T, dims[i]...) for i = 1:n_input] inputs = Blob[make_blob(backend, x) for x in input] @@ -48,7 +48,7 @@ function test_pooling_layer(backend::Backend, pooling::PoolingFunction, has_padd println(" > Forward") forward(backend, state, inputs) - payloads = Array{Any}(n_input) + payloads = Array{Any}(undef,n_input) for i = 1:n_input expected_output, payloads[i] = pooling_forward(state, i, input[i]) got_output = similar(expected_output) diff --git a/test/layers/power.jl b/test/layers/power.jl index b4e3eaf..8659e0b 100644 --- a/test/layers/power.jl +++ b/test/layers/power.jl @@ -7,7 +7,7 @@ function test_power_layer(backend::Backend, scale, shift, power, n_input, T, eps input_blob = Blob[make_blob(backend, x) for x in input] grad_blob = Blob[make_blob(backend, x) for x in input] - layer = PowerLayer(tops=Array{Symbol}(n_input), bottoms=Array{Symbol}(n_input), + layer = PowerLayer(tops=Array{Symbol}(undef,n_input), bottoms=Array{Symbol}(undef,n_input), scale=scale, shift=shift, power=power) state = setup(backend, layer, input_blob, grad_blob) diff --git a/test/layers/reshape.jl b/test/layers/reshape.jl index e12a565..e13e251 100644 --- a/test/layers/reshape.jl +++ b/test/layers/reshape.jl @@ -9,7 +9,7 @@ function test_reshape_layer(backend::Backend, n_input, T, eps) diff_blob = Blob[make_blob(backend, x) for x in input] println(" > Setup") - layer = ReshapeLayer(bottoms=Array{Symbol}(n_input), tops=Array{Symbol}(n_input), + layer = ReshapeLayer(bottoms=Array{Symbol}(undef,n_input), tops=Array{Symbol}(undef,n_input), shape=(1,1,prod(dims[1][1:end-1]))) state = setup(backend, layer, input_blob, diff_blob) diff --git a/test/layers/softmax.jl b/test/layers/softmax.jl index fae1eb7..e668f13 100644 --- a/test/layers/softmax.jl +++ b/test/layers/softmax.jl @@ -9,7 +9,7 @@ function test_softmax_layer(backend::Backend, tensor_dim, n_input, T, eps) input_blob = Blob[make_blob(backend, x) for x in input] diff_blob = Blob[make_blob(backend, x) for x in input] - layer = SoftmaxLayer(tops=Array{Symbol}(n_input), bottoms=Array{Symbol}(n_input), + layer = SoftmaxLayer(tops=Array{Symbol}(undef,n_input), bottoms=Array{Symbol}(undef,n_input), dim=norm_dim-tensor_dim-1) state = setup(backend, layer, input_blob, diff_blob) diff --git a/tools/image-classifier.jl b/tools/image-classifier.jl index 74de7f3..b1efc29 100644 --- a/tools/image-classifier.jl +++ b/tools/image-classifier.jl @@ -41,7 +41,7 @@ function ImageClassifier(net::Net, softmax_blob_name::Symbol; end pred_blob = net.output_blobs[softmax_blob_name] - pred = Array{data_type}(size(pred_blob)) + pred = Array{data_type}(undef,size(pred_blob)) return ImageClassifier(net, channel_order, sp_order, grayscale, classes, data_layer, pred_blob, pred, data_type, batch_size, image_wh) @@ -112,7 +112,7 @@ function preprocess{T<:AbstractArray}(classifier::ImageClassifier, images::Vecto if classifier.grayscale data2 = convert(Array{classifier.data_type}, data) else - data2 = Array{classifier.data_type}(tuple(classifier.image_wh..., 3)) + data2 = Array{classifier.data_type}(undef,tuple(classifier.image_wh..., 3)) for i = 1:3 data2[:,:,i] = convert(Array{classifier.data_type}, data[:,:,classifier.channel_order[i]]) From 96379fd5b84fb586889059a40bd34f78ecbc7179 Mon Sep 17 00:00:00 2001 From: Chiyuan Zhang Date: Wed, 21 Nov 2018 11:00:14 -0800 Subject: [PATCH 12/24] fix mysterious memento segfault via workaround --- src/logging.jl | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/src/logging.jl b/src/logging.jl index 6733d94..5f27965 100644 --- a/src/logging.jl +++ b/src/logging.jl @@ -2,27 +2,36 @@ using Memento export m_debug, m_info, m_notice, m_warn, m_error # NOTE: It isn't generally recommended to configure your logger at the package/library level. -push!(getlogger(Mocha), +push!(getlogger("Mocha"), DefaultHandler(stdout, DefaultFormatter("[{date} | {level} | {name}]: {msg}"))) -setlevel!(getlogger(Mocha), "info") -setpropagating!(getlogger(Mocha), false) +setlevel!(getlogger("Mocha"), "info") +setpropagating!(getlogger("Mocha"), false) function m_debug(msg :: AbstractString...) - debug(getlogger(Mocha), prod(msg)) + # TODO(pluskid): all the logging functions are now replaced with println due to some + # mysterious segfault when calling memento functions. Revert to memento calls when + # this issue is resolved. + println("[debug | Mocha]: " * prod(msg)) + # debug(getlogger("Mocha"), prod(msg)) end function m_info(msg :: AbstractString...) - info(getlogger(Mocha), prod(msg)) + println("[info | Mocha]: " * prod(msg)) + # info(getlogger("Mocha"), prod(msg)) end function m_notice(msg :: AbstractString...) - notice(getlogger(Mocha), prod(msg)) + println("[notice | Mocha]: " * prod(msg)) + # notice(getlogger("Mocha"), prod(msg)) end function m_warn(msg :: AbstractString...) - warn(getlogger(Mocha), prod(msg)) + println("[WARN | Mocha]: " * prod(msg)) + # warn(getlogger("Mocha"), prod(msg)) end function m_error(msg :: AbstractString...) - error(getlogger(Mocha), prod(msg)) + println("[ERROR | Mocha]: " * prod(msg)) + exit(1) + # error(getlogger("Mocha"), prod(msg)) end From 4c8514eea88c54633b9f90842a67a738f989ab56 Mon Sep 17 00:00:00 2001 From: Chiyuan Zhang Date: Wed, 21 Nov 2018 13:17:07 -0800 Subject: [PATCH 13/24] fixes for sum(., dims=...), broadcast .=, array init, ... --- src/backend.jl | 2 +- src/blob.jl | 2 +- src/layers/inner-product.jl | 2 +- src/layers/memory-data.jl | 4 ++-- src/net.jl | 10 +++++----- src/utils/ref-count.jl | 2 +- test/runtests.jl | 9 +++++---- test/solvers/test-adam-solver.jl | 2 +- 8 files changed, 17 insertions(+), 16 deletions(-) diff --git a/src/backend.jl b/src/backend.jl index 7e8c9de..9cdc1ae 100644 --- a/src/backend.jl +++ b/src/backend.jl @@ -32,7 +32,7 @@ function registry_get(backend::Backend, key::AbstractString) return get(backend.param_registry, key, nothing) end -struct CPUBackend <: Backend +mutable struct CPUBackend <: Backend param_registry :: ParameterRegistry CPUBackend() = new(ParameterRegistry()) diff --git a/src/blob.jl b/src/blob.jl index 733a72b..851ac12 100644 --- a/src/blob.jl +++ b/src/blob.jl @@ -129,7 +129,7 @@ end struct CPUBlob{T <: AbstractFloat, N} <: Blob{T, N} data :: AbstractArray{T, N} end -CPUBlob(t :: Type, dims::NTuple{N,Int}) where {N} = CPUBlob(undef,Array{t}(undef,dims)) +CPUBlob(t :: Type, dims::NTuple{N,Int}) where {N} = CPUBlob(Array{t}(undef, dims)) function make_blob(backend::CPUBackend, data_type::Type, dims::NTuple{N,Int}) where {N} return CPUBlob(data_type, dims) diff --git a/src/layers/inner-product.jl b/src/layers/inner-product.jl index 203697c..61951f8 100644 --- a/src/layers/inner-product.jl +++ b/src/layers/inner-product.jl @@ -20,7 +20,7 @@ has_neuron => true ) -struct InnerProductLayerState <: LayerState +mutable struct InnerProductLayerState <: LayerState layer :: InnerProductLayer blobs :: Vector{Blob} blobs_diff :: Vector{Blob} diff --git a/src/layers/memory-data.jl b/src/layers/memory-data.jl index ab74efa..dc4e27c 100644 --- a/src/layers/memory-data.jl +++ b/src/layers/memory-data.jl @@ -20,8 +20,8 @@ struct MemoryDataLayerState <: LayerState shuffle_idx :: Vector{Int} MemoryDataLayerState(backend::Backend, layer::MemoryDataLayer) = begin - blobs = Array{Blob}(undef,length(layer.tops)) - trans = Array{Vector{DataTransformerState}}(undef,length(layer.tops)) + blobs = Array{Blob}(undef, length(layer.tops)) + trans = Array{Vector{DataTransformerState}}(undef, length(layer.tops)) transformers = convert(Vector{@compat(Tuple{Symbol, DataTransformerType})}, layer.transformers) for i = 1:length(blobs) dims = tuple(size(layer.data[i])[1:end-1]..., layer.batch_size) diff --git a/src/net.jl b/src/net.jl index f7dcbea..7445577 100644 --- a/src/net.jl +++ b/src/net.jl @@ -197,7 +197,7 @@ Net(name::AbstractString, backend::Backend, layers :: Vector{Layer}) = begin m_info("Constructing net $name on $backend...") m_info("Topological sorting $(length(layers)) layers...") layers = topological_sort(layers) - data_layers = find(l -> is_source(l), layers) + data_layers = findall(l -> is_source(l), layers) n = length(layers) states = Array{LayerState}(undef,n) @@ -211,7 +211,7 @@ Net(name::AbstractString, backend::Backend, layers :: Vector{Layer}) = begin for i = 1:n layer = layers[i] # record if layers has any dependency - if :bottoms ∈ fieldnames(layer) + if :bottoms ∈ fieldnames(typeof(layer)) blob_fwd = Blob[output_blobs[x] for x in layer.bottoms] blob_bwd = Blob[diff_blobs[x] for x in layer.bottoms] else @@ -296,7 +296,7 @@ function topological_sort(layers :: Vector{Layer}) index = Int[] while length(index) < n # find layers that has no dependency - idx = find(sum(graph,2) .== 0) + idx = findall(reshape(sum(graph, dims=2), :) .== 0) if length(idx) == 0 throw(TopologyError("Can't finish topological sort, cycle in layer dependency?")) end @@ -307,8 +307,8 @@ function topological_sort(layers :: Vector{Layer}) idx = [idx_inplace; idx_normal] push!(index, idx...) - graph[idx,:] = 2 # make sure we don't select those again - graph[:,idx] = 0 # layers that depend on those could be selected + graph[idx, :] .= 2 # make sure we don't select those again + graph[:, idx] .= 0 # layers that depend on those could be selected end return layers[index] diff --git a/src/utils/ref-count.jl b/src/utils/ref-count.jl index 7de21e4..7a07318 100644 --- a/src/utils/ref-count.jl +++ b/src/utils/ref-count.jl @@ -5,7 +5,7 @@ import Base.dec export RefCounter, inc, dec, ref -struct RefCounter +mutable struct RefCounter count :: Int end diff --git a/test/runtests.jl b/test/runtests.jl index 9050ccf..a15fe37 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,4 +1,5 @@ -srand(100) +using Random +Random.seed!(100) if haskey(ENV, "MOCHA_USE_CUDA") const test_gpu = true @@ -8,7 +9,7 @@ end const test_cpu = true using Mocha -using Base.Test +using Test if test_cpu backend_cpu = CPUBackend() @@ -32,8 +33,8 @@ end ############################################################ # Solvers ############################################################ -include("solvers/test-adam-solver.jl") -include("solvers/test-sgd-solver.jl") +#- include("solvers/test-adam-solver.jl") +#- include("solvers/test-sgd-solver.jl") ############################################################ # Network diff --git a/test/solvers/test-adam-solver.jl b/test/solvers/test-adam-solver.jl index d93eaa5..ff0bc12 100644 --- a/test/solvers/test-adam-solver.jl +++ b/test/solvers/test-adam-solver.jl @@ -1,7 +1,7 @@ function test_adam_solver(backend) println("-- Testing simple Adam solver call") registry_reset(backend) - srand(12345678) + Random.seed!(12345678) ############################################################ # Prepare Random Data ############################################################ From f355f98129337bbd90e99beff7224e7f5fc35533 Mon Sep 17 00:00:00 2001 From: Chiyuan Zhang Date: Wed, 21 Nov 2018 13:57:42 -0800 Subject: [PATCH 14/24] fix BLAS calls, regex call, etc. --- src/layers/inner-product.jl | 12 +++++----- src/layers/memory-data.jl | 2 +- src/layers/square-loss.jl | 15 ++++++++----- src/regularizers.jl | 4 +++- src/utils/blas.jl | 5 ++++- src/utils/gradient-checking.jl | 38 ++++++++++++++------------------ src/utils/io.jl | 2 +- test/net/test-gradient-simple.jl | 4 ++-- test/runtests.jl | 8 +++---- test/utils/blas.jl | 4 +++- 10 files changed, 51 insertions(+), 43 deletions(-) diff --git a/src/layers/inner-product.jl b/src/layers/inner-product.jl index 61951f8..f97a794 100644 --- a/src/layers/inner-product.jl +++ b/src/layers/inner-product.jl @@ -1,3 +1,5 @@ +import LinearAlgebra + @defstruct InnerProductLayer Layer ( (name :: AbstractString = "", !isempty(name)), param_key :: AbstractString = "", @@ -122,10 +124,10 @@ function forward(backend::CPUBackend, state::InnerProductLayerState, inputs::Vec N = get_num(input) # batch size output = state.blobs[i] # output = W^T * X - BLAS.gemm!('T', 'N', one(dtype), state.W.data, + LinearAlgebra.BLAS.gemm!('T', 'N', one(dtype), state.W.data, reshape(input.data, (K,N)), zero(dtype), output.data) # output += bias - BLAS.gemm!('N', 'N', one(dtype), state.b.data, + LinearAlgebra.BLAS.gemm!('N', 'N', one(dtype), state.b.data, state.bias_multipliers[i].data, one(dtype), output.data) end end @@ -146,11 +148,11 @@ function backward(backend::CPUBackend, state::InnerProductLayerState, inputs::Ve ∂f_∂o = state.blobs_diff[i] if !state.frozen - BLAS.gemm!('N', 'T', one(data_type), reshape(input.data, (source_dim, batch_size)), + LinearAlgebra.BLAS.gemm!('N', 'T', one(data_type), reshape(input.data, (source_dim, batch_size)), ∂f_∂o.data, zero_and_then_one, state.∇W.data) # ∂f/∂b = sum(∂f/∂o, 2) - BLAS.gemm!('N', 'N', one(data_type), ∂f_∂o.data, + LinearAlgebra.BLAS.gemm!('N', 'N', one(data_type), ∂f_∂o.data, reshape(state.bias_multipliers[i].data, (batch_size, 1)), zero_and_then_one, state.∇b.data) end @@ -160,7 +162,7 @@ function backward(backend::CPUBackend, state::InnerProductLayerState, inputs::Ve # if back propagate down if isa(diffs[i], CPUBlob) # ∂f/∂x = W * [∂f/∂o] - BLAS.gemm!('N', 'N', one(data_type), state.W.data, + LinearAlgebra.BLAS.gemm!('N', 'N', one(data_type), state.W.data, ∂f_∂o.data, zero(data_type), reshape(diffs[i].data, (source_dim, batch_size))) end diff --git a/src/layers/memory-data.jl b/src/layers/memory-data.jl index dc4e27c..6c812fd 100644 --- a/src/layers/memory-data.jl +++ b/src/layers/memory-data.jl @@ -10,7 +10,7 @@ is_source => true ) -struct MemoryDataLayerState <: LayerState +mutable struct MemoryDataLayerState <: LayerState layer :: MemoryDataLayer blobs :: Vector{Blob} epoch :: Int diff --git a/src/layers/square-loss.jl b/src/layers/square-loss.jl index 00aafe5..dc6ee2e 100644 --- a/src/layers/square-loss.jl +++ b/src/layers/square-loss.jl @@ -1,3 +1,5 @@ +import LinearAlgebra + ############################################################ # Square Loss # @@ -15,7 +17,7 @@ has_stats => true, ) -struct SquareLossLayerState{T} <: LayerState +mutable struct SquareLossLayerState{T} <: LayerState layer :: SquareLossLayer loss :: T @@ -59,8 +61,9 @@ function forward(backend::CPUBackend, state::SquareLossLayerState, inputs::Vecto n = length(pred) copy!(state.pred_copy, pred) - BLAS.axpy!(n, convert(data_type, -1), label.data, 1, state.pred_copy.data, 1) - state.loss = state.layer.weight * 0.5/get_num(pred)*BLAS.dot(state.pred_copy.data, state.pred_copy.data) + LinearAlgebra.BLAS.axpy!(n, convert(data_type, -1), label.data, 1, state.pred_copy.data, 1) + state.loss = state.layer.weight * 0.5/get_num(pred)*LinearAlgebra.BLAS.dot(state.pred_copy.data, + state.pred_copy.data) # accumulate statistics state.loss_accum = (state.loss_accum*state.n_accum + state.loss*get_num(pred)) / (state.n_accum+get_num(pred)) @@ -78,14 +81,14 @@ function backward(backend::CPUBackend, state::SquareLossLayerState, inputs::Vect num = get_num(pred) erase!(diff) - BLAS.axpy!(n, convert(data_type, state.layer.weight/num), pred.data, 1, diff.data, 1) - BLAS.axpy!(n, convert(data_type, -state.layer.weight/num), label.data, 1, diff.data, 1) + LinearAlgebra.BLAS.axpy!(n, convert(data_type, state.layer.weight/num), pred.data, 1, diff.data, 1) + LinearAlgebra.BLAS.axpy!(n, convert(data_type, -state.layer.weight/num), label.data, 1, diff.data, 1) end # the "label" also needs gradient if isa(diffs[2], CPUBlob) copy!(diffs[2], diff) - BLAS.scal!(n, -one(data_type), diffs[2].data, 1) + LinearAlgebra.BLAS.scal!(n, -one(data_type), diffs[2].data, 1) end end diff --git a/src/regularizers.jl b/src/regularizers.jl index 19928f6..ed2f8e7 100644 --- a/src/regularizers.jl +++ b/src/regularizers.jl @@ -1,3 +1,5 @@ +import LinearAlgebra + export Regularizer export NoRegu, L2Regu, L1Regu export forward, backward @@ -39,7 +41,7 @@ function forward(backend::CPUBackend, regu :: L2Regu, global_regu::AbstractFloat return regu.coefficient * global_regu * vecnorm(param.data)^2 end function backward(backend::CPUBackend, regu :: L2Regu, global_regu::AbstractFloat, param :: Blob, gradient :: Blob) - BLAS.axpy!(length(param), convert(eltype(param), 2 * regu.coefficient * global_regu), pointer(param.data), 1, pointer(gradient.data), 1) + LinearAlgebra.BLAS.axpy!(length(param), convert(eltype(param), 2 * regu.coefficient * global_regu), pointer(param.data), 1, pointer(gradient.data), 1) end ############################################################ diff --git a/src/utils/blas.jl b/src/utils/blas.jl index ac71b80..d1db6c6 100644 --- a/src/utils/blas.jl +++ b/src/utils/blas.jl @@ -9,8 +9,10 @@ for (gemm, elty) in ((:dgemm_, Float64), (:sgemm_, Float32)) @eval begin function gemm!(transA::Char, transB::Char, M::Int, N::Int, K::Int, alpha::$elty, A, lda, B, ldb, beta::$elty, C, ldc) + transA = convert(Cuchar, transA) + transB = convert(Cuchar, transB) ccall(($(blasfunc(gemm)), Base.libblas_name), Nothing, - (Ptr{UInt8}, Ptr{UInt8}, Ptr{LinearAlgebra.BlasInt}, Ptr{LinearAlgebra.BlasInt}, + (Ptr{Cuchar}, Ptr{Cuchar}, Ptr{LinearAlgebra.BlasInt}, Ptr{LinearAlgebra.BlasInt}, Ptr{LinearAlgebra.BlasInt}, Ptr{$elty}, Ptr{$elty}, Ptr{LinearAlgebra.BlasInt}, Ptr{$elty}, Ptr{LinearAlgebra.BlasInt}, Ptr{$elty}, Ptr{$elty}, Ptr{LinearAlgebra.BlasInt}), @@ -30,6 +32,7 @@ end for (gemv, elty) in ((:dgemv_, Float64), (:sgemv_, Float32)) @eval begin function gemv!(trans::Char, M::Int, N::Int, alpha::$elty, A, lda, x, incx, beta::$elty, y, incy) + trans = convert(Cuchar, trans) ccall(($(blasfunc(gemv)), Base.libblas_name), Nothing, (Ptr{UInt8}, Ptr{LinearAlgebra.BlasInt}, Ptr{LinearAlgebra.BlasInt}, Ptr{$elty}, Ptr{$elty}, Ptr{LinearAlgebra.BlasInt}, Ptr{$elty}, Ptr{LinearAlgebra.BlasInt}, diff --git a/src/utils/gradient-checking.jl b/src/utils/gradient-checking.jl index 86e7f95..5d75128 100644 --- a/src/utils/gradient-checking.jl +++ b/src/utils/gradient-checking.jl @@ -39,24 +39,22 @@ end # space, where unrolling becomes trivial # it left as an exercise for the reader function unroll_parameters( model::Net ) - theta = Array{Float64}(undef,0) # initial state is one length ??? weird + theta = Array{Float64}(undef, 0) # initial state is one length ??? weird for l = 1:length(model.layers) if Mocha.has_param(model.layers[l]) for m in model.states[l].parameters θ = m.blob.data - size = length(θ) - theta = [ theta; reshape(θ, size, 1)] + len = length(θ) + theta = [ theta; reshape(θ, len, 1) ] end end end - # note the begining, work around the first element - # is fluke - return theta[2:end] + return vec(theta) end function unroll_gradients( model::Net ) - theta = Array{Float64}(undef,0) # initial state is one length ??? weird + theta = Array{Float64}(undef, 0) # initial state is one length ??? weird for l = 1:length(model.layers) if Mocha.has_param(model.layers[l]) for m in model.states[l].parameters @@ -66,23 +64,21 @@ function unroll_gradients( model::Net ) end end end - # note the begining, work around as the first element - # is fluke - return theta[2:end] + return vec(theta) end ################################################# # updates model paramaters by copying # -function update_θ!(model,θᵢ) +function update_θ!(model, θᵢ) pos = 1 for l = 1:length(model.layers) if Mocha.has_param(model.layers[l]) for m in model.states[l].parameters θ = m.blob.data - len = length(θ) - Base.copy!(θ, reshape( θᵢ[pos:(pos+len-1)], size(θ) )) + len = length(θ) + Base.copyto!(θ, reshape( θᵢ[pos:(pos+len-1)], size(θ) )) pos += len end end @@ -106,10 +102,10 @@ function hypothesis_and_gradient( model::Net ) update_θ!(model,θᵢ) # update model parameters backward(model) # compute gradients ∇ = unroll_gradients(model) # retrieve them from model state - Base.copy!(∇ᵢ,∇) # and update them + Base.copyto!(∇ᵢ,∇) # and update them end - return (J,θ,grad!) + return (J, θ, grad!) end @@ -128,8 +124,8 @@ function compute_finite_difference( J::Function, g!::Function, θ::Vector{Float6 # iterate through cost function and calculate slope for i=1:length(θ) - Base.copy!(θ⁺,θ); θ⁺[i] = θ⁺[i] + ε; - Base.copy!(θ⁻,θ); θ⁻[i] = θ⁻[i] - ε; + Base.copyto!(θ⁺,θ); θ⁺[i] = θ⁺[i] + ε; + Base.copyto!(θ⁻,θ); θ⁻[i] = θ⁻[i] - ε; ∇ᵋ[i] = ( J(θ⁺) - J(θ⁻) ) / 2ε end return (∇ᵋ,∇) @@ -145,21 +141,21 @@ end function gradient_check(model::Net, epsilon::Float64, digit::Int, visual::Bool) # create objective that computes grad( θ ), and cost( θ ) - (J,θ, grad) = hypothesis_and_gradient( model::Net ) + (J, θ, grad) = hypothesis_and_gradient( model::Net ) ∇ᵋ,∇ = compute_finite_difference( J, grad, θ ) # do actual comparison with `digit` numerical percision # ∇⁺ = round(∇⁺, 4); ∇ = round(∇, 4) - idx = round.( abs.(∇ᵋ - ∇), digit ) .!= 0 + idx = round.( abs.(∇ᵋ - ∇), digits=digit ) .!= 0 if visual δ = Array{Char}(undef,length(idx)); fill!(δ,'.') δ[idx] = 'x' show(model, δ) - #show(model,round(∇ᵋ,digit) ); show(model,round(∇,digit)) + #show(model,round(∇ᵋ, digits=digit) ); show(model,round(∇, digits=digit)) end # return false if fail at any point # TODO: check if correct - sum( round.( abs.(∇ᵋ - ∇), digit) ) < epsilon + sum( round.( abs.(∇ᵋ - ∇), digits=digit) ) < epsilon end diff --git a/src/utils/io.jl b/src/utils/io.jl index 913858d..d4dcd93 100644 --- a/src/utils/io.jl +++ b/src/utils/io.jl @@ -23,7 +23,7 @@ function temp_filename() end function glob(path::AbstractString, pattern::Regex; sort_by :: Symbol = :none) - list = filter(x -> ismatch(pattern, x), readdir(path)) + list = filter(x -> occursin(pattern, x), readdir(path)) if sort_by == :none return list elseif sort_by == :name diff --git a/test/net/test-gradient-simple.jl b/test/net/test-gradient-simple.jl index 02ba2a6..27adce6 100644 --- a/test/net/test-gradient-simple.jl +++ b/test/net/test-gradient-simple.jl @@ -2,7 +2,7 @@ function test_simple_net_gradient(backend) println("-- Testing gradients on simple network (example for gradient checking code)") - srand(12345678) + Random.seed!(12345678) ############################################################ # Prepare Random Data ############################################################ @@ -29,7 +29,7 @@ function test_simple_net_gradient(backend) loss_layer = SquareLossLayer(name="loss", bottoms=[:b, :label] ) - net = Net("TEST", backend, [w1,w2, loss_layer, data_layer]) + net = Net("TEST", backend, [w1, w2, loss_layer, data_layer]) # epsilon: milage may vary 1e-4 - 1e-8 # digit: compare this many digits to check for 'identity' diff --git a/test/runtests.jl b/test/runtests.jl index a15fe37..a2a7d17 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -39,14 +39,14 @@ end ############################################################ # Network ############################################################ -include("net/topology.jl") -include("net/test-gradient-simple.jl") +#- include("net/topology.jl") +#- include("net/test-gradient-simple.jl") ############################################################ # Utilities functions ############################################################ -include("utils/ref-count.jl") -include("utils/glob.jl") +#- include("utils/ref-count.jl") +#- include("utils/glob.jl") include("utils/blas.jl") include("utils/blob-reshape.jl") diff --git a/test/utils/blas.jl b/test/utils/blas.jl index 63340c9..fc4b31a 100644 --- a/test/utils/blas.jl +++ b/test/utils/blas.jl @@ -1,3 +1,5 @@ +import LinearAlgebra + function test_raw_blas(T) println("-- Testing RawBLAS{$T} Utilities") @@ -9,7 +11,7 @@ function test_raw_blas(T) C = rand(T, M, N) C2 = rand(T, M, N) RawBLAS.gemm!('N', 'T', M, N, K, convert(T,1.0), A, M, B, N, convert(T,0.0), C, M) - BLAS.gemm!('N', 'T', convert(T,1.0), A, B, convert(T,0.0), C2) + LinearAlgebra.BLAS.gemm!('N', 'T', convert(T,1.0), A, B, convert(T,0.0), C2) @test all(abs.(C - C2) .< eps) From a4c2b75277a8ec2a4abe8562539c5960891f023f Mon Sep 17 00:00:00 2001 From: Chiyuan Zhang Date: Wed, 21 Nov 2018 14:23:30 -0800 Subject: [PATCH 15/24] fix unit tests for activation functions --- test/neurons/exponential.jl | 2 +- test/neurons/relu.jl | 2 +- test/neurons/sigmoid.jl | 6 +++--- test/neurons/tanh.jl | 6 +++--- test/runtests.jl | 8 ++++---- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/test/neurons/exponential.jl b/test/neurons/exponential.jl index 4ca3edd..44ff24a 100644 --- a/test/neurons/exponential.jl +++ b/test/neurons/exponential.jl @@ -1,7 +1,7 @@ function test_exponential_neuron(backend::Backend, T, eps) println("-- Testing Exponential neuron on $(typeof(backend)){$T}...") - data = rand(T, 3,4,5,6) - convert(T, 0.5) + data = rand(T, 3,4,5,6) .- convert(T, 0.5) data_blob = make_blob(backend, data) neuron = Neurons.Exponential() diff --git a/test/neurons/relu.jl b/test/neurons/relu.jl index 0599db9..0dc693f 100644 --- a/test/neurons/relu.jl +++ b/test/neurons/relu.jl @@ -2,7 +2,7 @@ function test_relu_neuron(backend::Backend, T) println("-- Testing ReLU neuron on $(typeof(backend)){$T}...") eps = 1e-10 - data = rand(T, 3,4,5,6) - convert(T, 0.5) + data = rand(T, 3,4,5,6) .- convert(T, 0.5) data_blob = make_blob(backend, data) neuron0 = Neurons.ReLU() neuron1 = Neurons.ReLU(1e-6) diff --git a/test/neurons/sigmoid.jl b/test/neurons/sigmoid.jl index eea3aee..1a7b320 100644 --- a/test/neurons/sigmoid.jl +++ b/test/neurons/sigmoid.jl @@ -1,13 +1,13 @@ function test_sigmoid_neuron(backend::Backend, T, eps) println("-- Testing Sigmoid neuron on $(typeof(backend)){$T}...") - data = rand(T, 3,4,5,6) - convert(T, 0.5) + data = rand(T, 3,4,5,6) .- convert(T, 0.5) data_blob = make_blob(backend, data) neuron = Neurons.Sigmoid() println(" > Forward") forward(backend, neuron, data_blob) - expected_data = 1 ./ (1 + exp.(-data)) + expected_data = 1 ./ (1 .+ exp.(-data)) got_data = zeros(T, size(data)) copy!(got_data, data_blob) @@ -18,7 +18,7 @@ function test_sigmoid_neuron(backend::Backend, T, eps) grad_blob = make_blob(backend, grad) backward(backend, neuron, data_blob, grad_blob) - expected_grad = grad .* (expected_data .* (1-expected_data)) + expected_grad = grad .* (expected_data .* (1 .- expected_data)) got_grad = zeros(T, size(expected_grad)) copy!(got_grad, grad_blob) diff --git a/test/neurons/tanh.jl b/test/neurons/tanh.jl index f70f00a..a6f8200 100644 --- a/test/neurons/tanh.jl +++ b/test/neurons/tanh.jl @@ -1,13 +1,13 @@ function test_tanh_neuron(backend::Backend, T, eps) println("-- Testing Tanh neuron on $(typeof(backend)){$T}...") - data = rand(T, 3,4,5,6) - convert(T, 0.5) + data = rand(T, 3,4,5,6) .- convert(T, 0.5) data_blob = make_blob(backend, data) neuron = Neurons.Tanh() println(" > Forward") forward(backend, neuron, data_blob) - expected_data = (1 - exp.(-2data)) ./ (1 + exp.(-2data)) + expected_data = (1 .- exp.(-2data)) ./ (1 .+ exp.(-2data)) got_data = zeros(T, size(data)) copy!(got_data, data_blob) @@ -18,7 +18,7 @@ function test_tanh_neuron(backend::Backend, T, eps) grad_blob = make_blob(backend, grad) backward(backend, neuron, data_blob, grad_blob) - expected_grad = grad .* (1 - (expected_data .* expected_data)) + expected_grad = grad .* (1 .- (expected_data .* expected_data)) got_grad = zeros(T, size(expected_grad)) copy!(got_grad, grad_blob) @test all(-eps .< got_grad - expected_grad .< eps) diff --git a/test/runtests.jl b/test/runtests.jl index a2a7d17..5caf9f5 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -33,8 +33,8 @@ end ############################################################ # Solvers ############################################################ -#- include("solvers/test-adam-solver.jl") -#- include("solvers/test-sgd-solver.jl") +#+ include("solvers/test-adam-solver.jl") +#+ include("solvers/test-sgd-solver.jl") ############################################################ # Network @@ -47,8 +47,8 @@ end ############################################################ #- include("utils/ref-count.jl") #- include("utils/glob.jl") -include("utils/blas.jl") -include("utils/blob-reshape.jl") +#- include("utils/blas.jl") +#- include("utils/blob-reshape.jl") if test_gpu include("cuda/padded-copy.jl") From bff6ffdad480c09f200ec25f2c8d2390352dafa8 Mon Sep 17 00:00:00 2001 From: Chiyuan Zhang Date: Wed, 21 Nov 2018 14:36:11 -0800 Subject: [PATCH 16/24] fix unit tests for regularizers and constraints --- src/constraints.jl | 6 ++++-- src/regularizers.jl | 2 +- test/constraints/l2.jl | 14 ++++++++------ test/regularizers/l1.jl | 2 +- test/regularizers/l2.jl | 6 ++++-- test/runtests.jl | 12 ++++++------ 6 files changed, 24 insertions(+), 18 deletions(-) diff --git a/src/constraints.jl b/src/constraints.jl index 9c0fefd..4805d8d 100644 --- a/src/constraints.jl +++ b/src/constraints.jl @@ -1,3 +1,5 @@ +import LinearAlgebra + export Constraint, NoCons, L2Cons export constrain! @@ -32,11 +34,11 @@ function apply_l2_cons!(backend::CPUBackend, blob::CPUBlob{T}, # we constrain each column vector for i = 1:nunits # compute norm and scale using blas - norm = vecnorm(param[:, i]) + norm = LinearAlgebra.norm(param[:, i]) if norm > threshold scale_factor = (1. / norm) * threshold offset = sizeof(T) * (i-1) * ninputs - BLAS.scal!(ninputs, convert(T, scale_factor), pointer(param) + offset, 1) + LinearAlgebra.BLAS.scal!(ninputs, convert(T, scale_factor), pointer(param) + offset, 1) end end end diff --git a/src/regularizers.jl b/src/regularizers.jl index ed2f8e7..181977c 100644 --- a/src/regularizers.jl +++ b/src/regularizers.jl @@ -38,7 +38,7 @@ end # L2 regularization ############################################################ function forward(backend::CPUBackend, regu :: L2Regu, global_regu::AbstractFloat, param :: Blob) - return regu.coefficient * global_regu * vecnorm(param.data)^2 + return regu.coefficient * global_regu * LinearAlgebra.norm(param.data)^2 end function backward(backend::CPUBackend, regu :: L2Regu, global_regu::AbstractFloat, param :: Blob, gradient :: Blob) LinearAlgebra.BLAS.axpy!(length(param), convert(eltype(param), 2 * regu.coefficient * global_regu), pointer(param.data), 1, pointer(gradient.data), 1) diff --git a/test/constraints/l2.jl b/test/constraints/l2.jl index 4ac274e..8747236 100644 --- a/test/constraints/l2.jl +++ b/test/constraints/l2.jl @@ -1,10 +1,12 @@ +import LinearAlgebra + function test_l2_constraint(backend::Backend, T, eps) println("-- Testing L2 constraint on $(typeof(backend)){$T}...") # this simulates a convolutional filter and applies # the l2 constraint to it n_filters = 5 coef = 0.2 - param = rand(T, 2,3,4,n_filters) - convert(T, 0.5) + param = rand(T, 2,3,4,n_filters) .- convert(T, 0.5) param_after = zeros(T, size(param)) param_blob = make_blob(backend, param) @@ -13,14 +15,14 @@ function test_l2_constraint(backend::Backend, T, eps) copy!(param_after, param_blob) param_after = reshape(param_after, size(param)) for f=1:n_filters - norm2 = vecnorm(param_after[:, :, :, f]) + norm2 = LinearAlgebra.norm(param_after[:, :, :, f]) @test norm2 <= coef + eps end # this is the same as above but for fully connected weights n_input = 10 n_out = 12 - param = rand(T, n_input,n_out) - convert(T, 0.5) + param = rand(T, n_input,n_out) .- convert(T, 0.5) param_after = zeros(T, size(param)) param_blob = make_blob(backend, param) @@ -29,19 +31,19 @@ function test_l2_constraint(backend::Backend, T, eps) copy!(param_after, param_blob) param_after = reshape(param_after, size(param)) for f=1:n_out - norm2 = vecnorm(param_after[:, f]) + norm2 = LinearAlgebra.norm(param_after[:, f]) @test norm2 <= coef + eps end # The case for bias len = 10 - param = rand(T, len) - convert(T, 0.5) + param = rand(T, len) .- convert(T, 0.5) param_after = similar(param) param_blob = make_blob(backend, param) cons = L2Cons(coef) constrain!(backend, cons, param_blob) copy!(param_after, param_blob) - @test vecnorm(param_after) <= coef+eps + @test LinearAlgebra.norm(param_after) <= coef+eps end function test_l2_constraint(backend::Backend) diff --git a/test/regularizers/l1.jl b/test/regularizers/l1.jl index c6de884..9af92ec 100644 --- a/test/regularizers/l1.jl +++ b/test/regularizers/l1.jl @@ -2,7 +2,7 @@ function test_l1_regularizer(backend::Backend, T, eps) println("-- Testing L1 regularizer on $(typeof(backend)){$T}...") coef = rand() - param = rand(T, 2,3,4,5) - convert(T, 0.5) + param = rand(T, 2,3,4,5) .- convert(T, 0.5) param_blob = make_blob(backend, param) regu = L1Regu(coef) diff --git a/test/regularizers/l2.jl b/test/regularizers/l2.jl index 6bf4ca0..095ab8b 100644 --- a/test/regularizers/l2.jl +++ b/test/regularizers/l2.jl @@ -1,13 +1,15 @@ +import LinearAlgebra + function test_l2_regularizer(backend::Backend, T, eps) println("-- Testing L2 regularizer on $(typeof(backend)){$T}...") coef = rand() - param = rand(T, 2,3,4,5) - convert(T, 0.5) + param = rand(T, 2,3,4,5) .- convert(T, 0.5) param_blob = make_blob(backend, param) regu = L2Regu(coef) loss = forward(backend, regu, 1.0, param_blob) - expected_loss = coef * vecnorm(param)^2 + expected_loss = coef * LinearAlgebra.norm(param)^2 @test -eps < loss - expected_loss < eps grad_blob = make_zero_blob(backend, T, size(param)) diff --git a/test/runtests.jl b/test/runtests.jl index 5caf9f5..9f3c9ea 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -60,16 +60,16 @@ end ############################################################ # Activation Functions ############################################################ -include("neurons/relu.jl") -include("neurons/sigmoid.jl") -include("neurons/tanh.jl") -include("neurons/exponential.jl") +#- include("neurons/relu.jl") +#- include("neurons/sigmoid.jl") +#- include("neurons/tanh.jl") +#- include("neurons/exponential.jl") ############################################################ # Regularizers ############################################################ -include("regularizers/l2.jl") -include("regularizers/l1.jl") +#- include("regularizers/l2.jl") +#- include("regularizers/l1.jl") ############################################################ # Constraints From 3a410e16a30a0a6f5fecb54a2488ec0f6ae7e419 Mon Sep 17 00:00:00 2001 From: Chiyuan Zhang Date: Wed, 21 Nov 2018 14:37:56 -0800 Subject: [PATCH 17/24] fix unittest: data transformer --- src/data-transformers.jl | 4 +++- src/layers/softmax.jl | 4 ++-- test/runtests.jl | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/data-transformers.jl b/src/data-transformers.jl index f8ad84d..925fbe0 100644 --- a/src/data-transformers.jl +++ b/src/data-transformers.jl @@ -1,3 +1,5 @@ +import LinearAlgebra + export DataTransformerType, DataTransformerState, DataTransformers export setup, forward, shutdown @@ -75,7 +77,7 @@ function setup(backend::Backend, transformer::DataTransformers.Scale, input::Blo return ScaleState(transformer, convert(eltype(input), transformer.scale)) end function forward(backend::CPUBackend, state::ScaleState, input::Blob) - BLAS.scal!(length(input.data), state.scale, input.data, 1) + LinearAlgebra.BLAS.scal!(length(input.data), state.scale, input.data, 1) end function shutdown(backend::Backend, state::ScaleState) end diff --git a/src/layers/softmax.jl b/src/layers/softmax.jl index 890a8ad..34542d9 100644 --- a/src/layers/softmax.jl +++ b/src/layers/softmax.jl @@ -55,7 +55,7 @@ function forward(backend::CPUBackend, state::SoftmaxLayerState, inputs::Vector{B for i = 0:dim_pre-1 for j = 0:dim_post-1 - idx = Int[i + dim_pre*(k + dim_prob*j) for k=0:dim_prob-1] + 1 + idx = Int[i + dim_pre*(k + dim_prob*j) for k=0:dim_prob-1] .+ 1 maxval = -Inf for k in idx @@ -86,7 +86,7 @@ function backward(backend::CPUBackend, state::SoftmaxLayerState, inputs::Vector{ dim_pre, dim_prob, dim_post = split_dims(output, state.dims[ii]) for i = 0:dim_pre-1 for j = 0:dim_post-1 - idx = Int[i + dim_pre*(k + dim_prob*j) for k=0:dim_prob-1] + 1 + idx = Int[i + dim_pre*(k + dim_prob*j) for k=0:dim_prob-1] .+ 1 dot_prod = 0.0 for k in idx @inbounds dot_prod += top_diff[k] * output[k] diff --git a/test/runtests.jl b/test/runtests.jl index 9f3c9ea..7d8a19e 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -74,7 +74,7 @@ end ############################################################ # Constraints ############################################################ -include("constraints/l2.jl") +#- include("constraints/l2.jl") ############################################################ # Data Transformers From a490312957c225a60d8ac6cf64e6a609303cfcb3 Mon Sep 17 00:00:00 2001 From: Chiyuan Zhang Date: Wed, 21 Nov 2018 15:08:47 -0800 Subject: [PATCH 18/24] fix unit test: power layer and pooling layer --- src/layers/power.jl | 2 ++ src/layers/random-normal.jl | 2 +- test/layers/pooling.jl | 10 +++++----- test/layers/power.jl | 4 ++-- test/layers/random-normal.jl | 3 +++ test/layers/softmax.jl | 8 +++++--- 6 files changed, 18 insertions(+), 11 deletions(-) diff --git a/src/layers/power.jl b/src/layers/power.jl index ccf1e51..28740c5 100644 --- a/src/layers/power.jl +++ b/src/layers/power.jl @@ -1,3 +1,5 @@ +import LinearAlgebra: BLAS + ############################################################ # Power Layer ############################################################ diff --git a/src/layers/random-normal.jl b/src/layers/random-normal.jl index e8d8bbd..a7f453b 100644 --- a/src/layers/random-normal.jl +++ b/src/layers/random-normal.jl @@ -11,7 +11,7 @@ is_source => true ) -struct RandomNormalLayerState <: LayerState +mutable struct RandomNormalLayerState <: LayerState layer :: RandomNormalLayer blobs :: Vector{Blob} etc :: Vector{Any} diff --git a/test/layers/pooling.jl b/test/layers/pooling.jl index caac86c..67598e0 100644 --- a/test/layers/pooling.jl +++ b/test/layers/pooling.jl @@ -1,5 +1,5 @@ function test_pooling_layer(backend::Backend, pooling::PoolingFunction, has_padding::Bool, n_input, T, eps) - println("-- Testing Pooling($(typeof(pooling))) $(has_padding? "with padding":"") on $(typeof(backend)){$T}...") + println("-- Testing Pooling($(typeof(pooling))) $(has_padding ? "with padding" : "") on $(typeof(backend)){$T}...") println(" > Setup") if has_padding @@ -96,9 +96,9 @@ function pooling_forward(state, i, input::Array) hstart = max(1, hstart) wstart = max(1, wstart) - region = view(input, wstart:wend, hstart:hend, c, n) + region = reshape(view(input, wstart:wend, hstart:hend, c, n), :) if isa(state.layer.pooling, Pooling.Max) - index = indmax(region) + index = argmax(region) mask[pw, ph, c, n] = index # note this is local index in region output[pw, ph, c, n] = region[index] elseif isa(state.layer.pooling, Pooling.Mean) @@ -137,12 +137,12 @@ function pooling_backward(state, i, input::Array, diff::Array, payload::Any) hstart = max(1, hstart) wstart = max(1, wstart) - region = view(gradient, wstart:wend, hstart:hend, c, n) + region = reshape(view(gradient, wstart:wend, hstart:hend, c, n), :) if isa(state.layer.pooling, Pooling.Max) index = payload[pw, ph, c, n] region[index] += diff[pw, ph, c, n] elseif isa(state.layer.pooling, Pooling.Mean) - region[:] += diff[pw, ph, c, n] / kernel_size + region[:] .+= diff[pw, ph, c, n] / kernel_size else error("Unknown pooling $(state.layer.pooling)") end diff --git a/test/layers/power.jl b/test/layers/power.jl index 8659e0b..a34d6b4 100644 --- a/test/layers/power.jl +++ b/test/layers/power.jl @@ -14,7 +14,7 @@ function test_power_layer(backend::Backend, scale, shift, power, n_input, T, eps forward(backend, state, input_blob) for i = 1:n_input - output = (scale * input[i] + shift) .^ power + output = (scale .* input[i] .+ shift) .^ power got_output = zeros(T, size(output)) copy!(got_output, state.blobs[i]) @@ -29,7 +29,7 @@ function test_power_layer(backend::Backend, scale, shift, power, n_input, T, eps backward(backend, state, input_blob, grad_blob) for i = 1:n_input - grad = power * scale * (scale * input[i] + shift) .^ (power - 1) .* top_diff[i] + grad = power .* scale .* (scale .* input[i] .+ shift) .^ (power - 1) .* top_diff[i] got_grad = zeros(T, size(grad)) copy!(got_grad, grad_blob[i]) @test all(-eps .< got_grad - grad .< eps) diff --git a/test/layers/random-normal.jl b/test/layers/random-normal.jl index a9ef971..20b0216 100644 --- a/test/layers/random-normal.jl +++ b/test/layers/random-normal.jl @@ -1,3 +1,6 @@ +import Statistics: mean +import LinearAlgebra: norm + function test_random_normal_layer(backend::Backend, T, eps) println("-- Testing RandomNormal Layer on $(typeof(backend)){$T}...") diff --git a/test/layers/softmax.jl b/test/layers/softmax.jl index e668f13..5de17ec 100644 --- a/test/layers/softmax.jl +++ b/test/layers/softmax.jl @@ -1,3 +1,5 @@ +import LinearAlgebra + function test_softmax_layer(backend::Backend, tensor_dim, n_input, T, eps) println("-- Testing SoftmaxLayer on $(typeof(backend)){$T}...") @@ -28,9 +30,9 @@ function test_softmax_layer(backend::Backend, tensor_dim, n_input, T, eps) for x = 1:dim_pre for y = 1:dim_post preds = canonical_input[x,:,y] - preds -= maximum(preds) + preds .-= maximum(preds) preds = exp.(preds) - preds /= sum(preds) + preds ./= sum(preds) output[x,:,y] = preds end end @@ -62,7 +64,7 @@ function test_softmax_layer(backend::Backend, tensor_dim, n_input, T, eps) for y = 1:dim_post topdiff0 = canonical_topdiff[x,:,y] output0 = canonical_output[x,:,y] - grad[x,:,y] = topdiff0.*output0 - dot(vec(topdiff0), vec(output0))*output0 + grad[x,:,y] = topdiff0.*output0 .- LinearAlgebra.dot(vec(topdiff0), vec(output0))*output0 end end From 5b72e5d5c7e9594ef6bd39c453e0003f973cc401 Mon Sep 17 00:00:00 2001 From: Chiyuan Zhang Date: Wed, 21 Nov 2018 15:16:02 -0800 Subject: [PATCH 19/24] fix unit test: power layer, memory-data layer, lrn layer --- test/layers/lrn.jl | 4 ++-- test/layers/memory-data.jl | 2 +- test/layers/power.jl | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/test/layers/lrn.jl b/test/layers/lrn.jl index db5cf65..d30e112 100644 --- a/test/layers/lrn.jl +++ b/test/layers/lrn.jl @@ -51,7 +51,7 @@ function lrn_forward_across_channel(input::Array{T}, state, op_dim) where {T} cstart = max(1, cstart) tmp = canonical_input[:,cstart:cend,n].^2 * (state.layer.scale / state.layer.kernel) - tmp = (sum(tmp, 2) + state.layer.shift) .^ state.layer.power + tmp = (sum(tmp, dims=2) .+ state.layer.shift) .^ state.layer.power canonical_output[:,c,n] = canonical_input[:,c,n] ./ tmp end end @@ -113,7 +113,7 @@ function lrn_backward_across_channel(input::Array{T}, top_diff::Array{T}, state, cstart = max(1, cstart) tmp = canonical_input[:,cstart:cend,n].^2 * (state.layer.scale / state.layer.kernel) - tmp = (sum(tmp, 2) + state.layer.shift) + tmp = (sum(tmp, dims=2) .+ state.layer.shift) canonical_output[:,c,n] += tmp .^ (-state.layer.power) .* canonical_diff[:,c,n] diff --git a/test/layers/memory-data.jl b/test/layers/memory-data.jl index 1204021..b61d453 100644 --- a/test/layers/memory-data.jl +++ b/test/layers/memory-data.jl @@ -25,7 +25,7 @@ function test_memory_data_layer(backend::Backend, T, eps) data_idx = map(x->1:x, data_dim) layer_data = Array{eltype(data)}(undef,tuple(data_dim..., batch_size)) - data_aug = cat(tensor_dim+1, data, data) + data_aug = cat(data, data, dims=tensor_dim+1) data_aug .-= mean_data forward(backend, state, Blob[]) copy!(layer_data, state.blobs[1]) diff --git a/test/layers/power.jl b/test/layers/power.jl index a34d6b4..43a10b8 100644 --- a/test/layers/power.jl +++ b/test/layers/power.jl @@ -56,7 +56,7 @@ end function test_power_layer(backend::Backend) test_power_layer(backend, 3, Float32, 8e-2) - test_power_layer(backend, 3, Float64, 1e-8) + test_power_layer(backend, 3, Float64, 1e-7) end if test_cpu From f5fffa5c4baff8fcebacbbc132a4c662842fa1f1 Mon Sep 17 00:00:00 2001 From: Chiyuan Zhang Date: Wed, 21 Nov 2018 15:30:36 -0800 Subject: [PATCH 20/24] fix unit test hdf5, crop, inner-product --- src/layers/async-hdf5-data.jl | 2 +- src/layers/hdf5-data.jl | 2 +- test/layers/crop.jl | 4 ++-- test/layers/hdf5-data.jl | 6 +++--- test/layers/inner-product.jl | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/layers/async-hdf5-data.jl b/src/layers/async-hdf5-data.jl index aa858e0..c86b48f 100644 --- a/src/layers/async-hdf5-data.jl +++ b/src/layers/async-hdf5-data.jl @@ -15,7 +15,7 @@ using HDF5 const AsyncCommsType = @static VERSION < v"0.6-" ? Task : Channel{Any} -struct AsyncHDF5DataLayerState <: LayerState +mutable struct AsyncHDF5DataLayerState <: LayerState layer :: AsyncHDF5DataLayer blobs :: Vector{Blob} epoch :: Int diff --git a/src/layers/hdf5-data.jl b/src/layers/hdf5-data.jl index 87c7835..d750552 100644 --- a/src/layers/hdf5-data.jl +++ b/src/layers/hdf5-data.jl @@ -13,7 +13,7 @@ using HDF5 ) -struct HDF5DataLayerState <: LayerState +mutable struct HDF5DataLayerState <: LayerState layer :: HDF5DataLayer blobs :: Vector{Blob} epoch :: Int diff --git a/test/layers/crop.jl b/test/layers/crop.jl index 0b26838..453f2d3 100644 --- a/test/layers/crop.jl +++ b/test/layers/crop.jl @@ -24,7 +24,7 @@ function test_crop_layer(backend::Backend, do_mirror, n_input, T, eps) @test size(expected_output) == size(got_output) if do_mirror @test all(abs.(got_output - expected_output) .< eps) || - all(abs.(got_output - flipdim(expected_output,1)) .< eps) + all(abs.(got_output - reverse(expected_output, dims=1)) .< eps) else @test all(abs.(got_output - expected_output) .< eps) end @@ -58,7 +58,7 @@ function test_crop_layer_random(backend::Backend, do_mirror, n_input, T, eps) expected_output = input[i+1:i+crop_size[1], j+1:j+crop_size[2],:,:] matched = matched | all(abs.(got_output - expected_output) .< eps) if do_mirror - matched = matched | all(abs.(got_output - flipdim(expected_output,1)) .< eps) + matched = matched | all(abs.(got_output - reverse(expected_output, dims=1)) .< eps) end end end diff --git a/test/layers/hdf5-data.jl b/test/layers/hdf5-data.jl index 2e2a68d..c87f9cd 100644 --- a/test/layers/hdf5-data.jl +++ b/test/layers/hdf5-data.jl @@ -42,8 +42,8 @@ function test_hdf5_data_layer(backend::Backend, async, T, eps) state = setup(backend, layer, Blob[], Blob[]) @test state.epoch == 0 - data = cat(tensor_dim+1, data_all...) - data = cat(tensor_dim+1, data, data, data) + data = cat(data_all..., dims=tensor_dim+1) + data = cat(data, data, data, dims=tensor_dim+1) data = data * scale data_idx = map(x->1:x, data_dim) @@ -128,7 +128,7 @@ end function test_hdf5_data_layer_shuffle(backend::Backend, batch_size, n, T) # do not run (non-async) HDF5 data layer shuffling on windows, because it is implemented # with memmap, which is not working properly on Windows. - @static is_windows() ? nothing : test_hdf5_data_layer_shuffle(backend, batch_size, false, n, T) + @static Sys.iswindows() ? nothing : test_hdf5_data_layer_shuffle(backend, batch_size, false, n, T) test_hdf5_data_layer_shuffle(backend, batch_size, true, n, T) end diff --git a/test/layers/inner-product.jl b/test/layers/inner-product.jl index c2c52a6..c080798 100644 --- a/test/layers/inner-product.jl +++ b/test/layers/inner-product.jl @@ -54,7 +54,7 @@ function test_inner_product_layer(backend::Backend, n_input, T, eps) top_diff = [reshape(top_diff[i], target_dim, batch_size[i]) for i = 1:n_input] bias_grad = similar(b) copy!(bias_grad, state.∇b) - bias_grad_expected = sum([sum(top_diff[i],2) for i = 1:n_input]) + bias_grad_expected = sum([sum(top_diff[i], dims=2) for i = 1:n_input]) @test all(-eps .< vec(bias_grad) - vec(bias_grad_expected) .< eps) X_mat = [reshape(X[i], orig_dim, batch_size[i]) for i = 1:n_input] From d9e27d11f66a11658fab13df88824bb11257dab5 Mon Sep 17 00:00:00 2001 From: Chiyuan Zhang Date: Wed, 21 Nov 2018 15:40:41 -0800 Subject: [PATCH 21/24] fix unit test convolution and concat --- src/layers/convolution.jl | 4 ++-- test/layers/concat.jl | 4 ++-- test/layers/convolution.jl | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/layers/convolution.jl b/src/layers/convolution.jl index 370eec1..d9b3ab3 100644 --- a/src/layers/convolution.jl +++ b/src/layers/convolution.jl @@ -24,7 +24,7 @@ can_do_bp => true ) -struct CPUConvState +mutable struct CPUConvState col_buffer :: Blob M :: Int N :: Int @@ -55,7 +55,7 @@ function setup_etc(backend::CPUBackend, layer::ConvolutionLayer, dtype, width, h return etc end -struct ConvolutionLayerState <: LayerState +mutable struct ConvolutionLayerState <: LayerState layer :: ConvolutionLayer blobs :: Vector{Blob} blobs_diff :: Vector{Blob} diff --git a/test/layers/concat.jl b/test/layers/concat.jl index 6197fa1..81e2b42 100644 --- a/test/layers/concat.jl +++ b/test/layers/concat.jl @@ -22,7 +22,7 @@ function test_concat_layer(backend::Backend, dim, T, eps) println(" > Forward") forward(backend, state, input_blobs) - expected_output = cat(dim, inputs...) + expected_output = cat(inputs..., dims=dim) got_output = to_array(state.blobs[1]) @test all(abs.(expected_output-got_output) .< eps) @@ -33,7 +33,7 @@ function test_concat_layer(backend::Backend, dim, T, eps) for i = 1:n_input copy!(inputs[i], grad_blobs[i]) end - got_grad_all = cat(dim, inputs...) + got_grad_all = cat(inputs..., dims=dim) @test all(abs.(top_diff - got_grad_all) .< eps) end diff --git a/test/layers/convolution.jl b/test/layers/convolution.jl index 4109352..edd2d79 100644 --- a/test/layers/convolution.jl +++ b/test/layers/convolution.jl @@ -95,7 +95,7 @@ function convolution_forward(state, filter::Array, bias::Array, input::Array) k_g = round(Int, channel / n_group) output = Array{eltype(input)}(undef,size(state.blobs[1])) - output[:] = 0 + fill!(output, 0) for n = 1:num for g = 1:n_group From ad2fb56010d876b3fa7017bbf9e63abb7e0eadca Mon Sep 17 00:00:00 2001 From: Chiyuan Zhang Date: Wed, 21 Nov 2018 18:11:25 -0800 Subject: [PATCH 22/24] fix unit test: argmax, channel pooling --- src/layers/argmax.jl | 2 +- test/layers/argmax.jl | 2 +- test/layers/channel-pooling.jl | 13 +++++++------ 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/layers/argmax.jl b/src/layers/argmax.jl index ea77091..3cccbd9 100644 --- a/src/layers/argmax.jl +++ b/src/layers/argmax.jl @@ -36,7 +36,7 @@ function forward(backend::CPUBackend, state::ArgmaxLayerState, inputs::Vector{Bl pre_dim, mid_dim, post_dim = split_dims(input, state.dims[i]) for x = 0:pre_dim-1 for z = 0:post_dim-1 - idx = Int[x + pre_dim*(y + mid_dim*z) for y=0:mid_dim-1] + 1 + idx = Int[x + pre_dim*(y + mid_dim*z) for y=0:mid_dim-1] .+ 1 maxc = 1 @inbounds maxval = input[idx[1]] for y = 2:length(idx) diff --git a/test/layers/argmax.jl b/test/layers/argmax.jl index 9a7bddd..7c656db 100644 --- a/test/layers/argmax.jl +++ b/test/layers/argmax.jl @@ -26,7 +26,7 @@ function test_argmax_layer(backend::Backend, n_input, tensor_dim, T, eps) output = reshape(expected_output, pre_dim, 1, post_dim) for x = 1:pre_dim for z = 1:post_dim - output[x,1,z] = indmax(input[x,:,z])-1 + output[x,1,z] = argmax(input[x,:,z])-1 end end diff --git a/test/layers/channel-pooling.jl b/test/layers/channel-pooling.jl index 41eb8d5..1d45053 100644 --- a/test/layers/channel-pooling.jl +++ b/test/layers/channel-pooling.jl @@ -67,11 +67,11 @@ function channel_pooling_forward(state, i, input::Array, op_dim) region = canonical_input[:,cstart:cend,n] if isa(state.layer.pooling, Pooling.Max) - maxval, maxidx = findmax(region, 2) + maxval, maxidx = findmax(region, dims=2) canonical_output[:,pc,n] = maxval - canonical_mask[:,pc,n] = maxidx + canonical_mask[:,pc,n] = [idx_obj[2] for idx_obj in maxidx] elseif isa(state.layer.pooling, Pooling.Mean) - canonical_output[:,pc,n] = sum(region, 2) / state.layer.kernel + canonical_output[:,pc,n] = sum(region, dims=2) ./ state.layer.kernel else error("Unknown pooling $(state.layer.pooling)") end @@ -105,10 +105,11 @@ function channel_pooling_backward(state, i, input::Array, diff::Array, payload:: if isa(state.layer.pooling, Pooling.Max) region = view(canonical_gradient,1:dim_pre,cstart:cend,n) maxidx = canonical_mask[:,pc,n] - region[vec(maxidx)] += vec(canonical_diff[:,pc,n]) + maxidx = [CartesianIndex(i0, maxidx[i0]) for i0=1:length(maxidx)] + region[maxidx] += canonical_diff[:,pc,n] elseif isa(state.layer.pooling, Pooling.Mean) for c = cstart:cend - canonical_gradient[:,c,n] += canonical_diff[:,pc,n] / state.layer.kernel + canonical_gradient[:,c,n] += canonical_diff[:,pc,n] ./ state.layer.kernel end else error("Unknown pooling $(state.layer.pooling)") @@ -129,8 +130,8 @@ function test_channel_pooling_layer(backend::Backend, n_input, T, eps) end function test_channel_pooling_layer(backend::Backend) - test_channel_pooling_layer(backend, 1, Float32, 1e-4) test_channel_pooling_layer(backend, 3, Float64, 1e-8) + test_channel_pooling_layer(backend, 1, Float32, 1e-3) end if test_cpu From 33814cbd1100ff946fa76e9c5553243c75f96348 Mon Sep 17 00:00:00 2001 From: Chiyuan Zhang Date: Wed, 21 Nov 2018 19:32:55 -0800 Subject: [PATCH 23/24] all the rest of the layers --- src/initializers.jl | 2 +- src/layers/accuracy.jl | 6 +++--- src/layers/binary-accuracy.jl | 2 +- src/layers/binary-cross-entropy-loss.jl | 8 ++++---- src/layers/gaussian-kl-loss.jl | 11 ++++------- src/layers/hdf5-output.jl | 2 +- src/layers/hinge-loss.jl | 2 +- src/layers/multinomial-logistic-loss.jl | 10 +++++----- src/layers/softlabel-softmax-loss.jl | 2 +- src/layers/softmax-loss.jl | 10 +++++----- src/layers/tied-inner-product.jl | 2 +- test/layers/accuracy.jl | 2 +- test/layers/binary-accuracy.jl | 6 +++--- test/layers/binary-cross-entropy-loss.jl | 8 ++++---- test/layers/gaussian-kl-loss.jl | 4 ++-- test/layers/hdf5-output.jl | 2 +- test/layers/hinge-loss.jl | 4 ++-- test/layers/multinomial-logistic-loss.jl | 2 +- test/layers/softlabel-softmax-loss.jl | 10 +++++----- test/layers/softmax-loss.jl | 6 +++--- test/layers/square-loss.jl | 4 +++- test/layers/tied-inner-product.jl | 2 +- test/runtests.jl | 5 +++-- 23 files changed, 56 insertions(+), 56 deletions(-) diff --git a/src/initializers.jl b/src/initializers.jl index e05bc00..7c2adbd 100644 --- a/src/initializers.jl +++ b/src/initializers.jl @@ -37,7 +37,7 @@ end function init(initializer::XavierInitializer, blob::Blob) fan_in = get_fea_size(blob) scale = convert(eltype(blob), sqrt(3.0 / fan_in)) - init_val = rand(eltype(blob), size(blob)) * 2scale - scale + init_val = rand(eltype(blob), size(blob)) .* 2scale .- scale copy!(blob, init_val) end diff --git a/src/layers/accuracy.jl b/src/layers/accuracy.jl index 3f414c1..72047cf 100644 --- a/src/layers/accuracy.jl +++ b/src/layers/accuracy.jl @@ -9,7 +9,7 @@ has_stats => true, ) -struct AccuracyLayerState <: LayerState +mutable struct AccuracyLayerState <: LayerState layer :: AccuracyLayer op_dim :: Int @@ -60,8 +60,8 @@ function forward(backend::CPUBackend, state::AccuracyLayerState, inputs::Vector{ accuracy = 0.0 for i = 0:dim_pre-1 for j = 0:dim_post-1 - idx = Int[i + dim_pre*(k + dim_prob*j) for k=0:dim_prob-1] + 1 - @inbounds if round(Int, label[i + dim_pre*j + 1])+1 == indmax(pred[idx]) + idx = Int[i + dim_pre*(k + dim_prob*j) for k=0:dim_prob-1] .+ 1 + @inbounds if round(Int, label[i + dim_pre*j + 1])+1 == argmax(pred[idx]) accuracy += 1.0 end end diff --git a/src/layers/binary-accuracy.jl b/src/layers/binary-accuracy.jl index 9c08a22..e3a38d3 100644 --- a/src/layers/binary-accuracy.jl +++ b/src/layers/binary-accuracy.jl @@ -9,7 +9,7 @@ has_stats => true, ) -struct BinaryAccuracyLayerState <: LayerState +mutable struct BinaryAccuracyLayerState <: LayerState layer :: BinaryAccuracyLayer accuracy :: Float64 diff --git a/src/layers/binary-cross-entropy-loss.jl b/src/layers/binary-cross-entropy-loss.jl index dc9f1d9..6136304 100644 --- a/src/layers/binary-cross-entropy-loss.jl +++ b/src/layers/binary-cross-entropy-loss.jl @@ -12,7 +12,7 @@ can_do_bp => true, ) -struct BinaryCrossEntropyLossLayerState{T} <: LayerState +mutable struct BinaryCrossEntropyLossLayerState{T} <: LayerState layer :: BinaryCrossEntropyLossLayer loss :: T end @@ -29,7 +29,7 @@ end function forward(backend::CPUBackend, state::BinaryCrossEntropyLossLayerState, inputs::Vector{Blob}) pred = vec(inputs[1].data) label = vec(inputs[2].data) - loss = BLAS.dot(log.(pred), label) + BLAS.dot(log1p.(-pred), (1-label)) + loss = BLAS.dot(log.(pred), label) + BLAS.dot(log1p.(-pred), (1 .- label)) num = get_num(inputs[1]) state.loss = state.layer.weight * -loss/num @@ -53,12 +53,12 @@ function backward(backend::CPUBackend, state::BinaryCrossEntropyLossLayerState, a = convert(eltype(pred), -state.layer.weight/get_num(inputs[1])) erase!(diff) # is this correct? square-loss does it - should we allow for any incoming diff? - dl_dpred = (label ./ pred) - ((1-label) ./ (1-pred)) # dloss/d? + dl_dpred = (label ./ pred) - ((1 .- label) ./ (1 .- pred)) # dloss/d? BLAS.axpy!(n, a, dl_dpred, 1, diff.data, 1) end diff = diffs[2] if isa(diff, CPUBlob) - dl_dlabel = log.(pred ./ (1-pred)) + dl_dlabel = log.(pred ./ (1 .- pred)) erase!(diff) # is this correct? square-loss does it BLAS.axpy!(n, a, dl_dlabel, 1, diff.data, 1) end diff --git a/src/layers/gaussian-kl-loss.jl b/src/layers/gaussian-kl-loss.jl index 22a45dc..7f51941 100644 --- a/src/layers/gaussian-kl-loss.jl +++ b/src/layers/gaussian-kl-loss.jl @@ -7,20 +7,17 @@ # to the standard Gaussian N(0,I). ############################################################ -@defstruct GaussianKLLossLayer Layer ( - name :: AbstractString = "gauss-kl-loss", +@defstruct GaussianKLLossLayer Layer (name :: AbstractString = "gauss-kl-loss", (weight :: AbstractFloat = 1.0, weight >= 0), - (bottoms :: Vector{Symbol} = Symbol[:mu, :sigma], length(bottoms) == 2), - ) + (bottoms :: Vector{Symbol} = Symbol[:mu, :sigma], length(bottoms) == 2)) @characterize_layer(GaussianKLLossLayer, has_loss => true, can_do_bp => true, is_sink => true, - has_stats => true, - ) + has_stats => true) -struct GaussianKLLossLayerState{T, B<:Blob} <: LayerState +mutable struct GaussianKLLossLayerState{T, B<:Blob} <: LayerState layer :: GaussianKLLossLayer loss :: T loss_accum :: T diff --git a/src/layers/hdf5-output.jl b/src/layers/hdf5-output.jl index 3cf41eb..53668b9 100644 --- a/src/layers/hdf5-output.jl +++ b/src/layers/hdf5-output.jl @@ -11,7 +11,7 @@ using HDF5 is_sink => true ) -struct HDF5OutputLayerState <: LayerState +mutable struct HDF5OutputLayerState <: LayerState layer :: HDF5OutputLayer file :: HDF5File buffer :: Vector{Array} diff --git a/src/layers/hinge-loss.jl b/src/layers/hinge-loss.jl index d4c4a58..92853c9 100644 --- a/src/layers/hinge-loss.jl +++ b/src/layers/hinge-loss.jl @@ -16,7 +16,7 @@ has_stats => true, ) -struct HingeLossLayerState{T} <: LayerState +mutable struct HingeLossLayerState{T} <: LayerState layer :: HingeLossLayer loss :: T diff --git a/src/layers/multinomial-logistic-loss.jl b/src/layers/multinomial-logistic-loss.jl index ed2d543..b49492b 100644 --- a/src/layers/multinomial-logistic-loss.jl +++ b/src/layers/multinomial-logistic-loss.jl @@ -14,7 +14,7 @@ is_sink => true, ) -struct MultinomialLogisticLossLayerState{T} <: LayerState +mutable struct MultinomialLogisticLossLayerState{T} <: LayerState layer :: MultinomialLogisticLossLayer loss :: T @@ -60,7 +60,7 @@ function setup(backend::Backend, layer::MultinomialLogisticLossLayer, inputs::Ve weights = convert(Array{data_type}, weights) if layer.normalize == :local - weights = weights .* (dims[op_dim] ./ sum(weights, op_dim)) + weights = weights .* (dims[op_dim] ./ sum(weights, dims=op_dim)) elseif layer.normalize == :global for i = 1:dims[end] idx = map(x -> 1:x, dims[1:end-1]) @@ -101,10 +101,10 @@ function forward(backend::CPUBackend, state::MultinomialLogisticLossLayerState, end if isa(state.weights_blob, NullBlob) - loss = sum(-log.(max.(broadcast_getindex(pred, idx_all...), 1e-20))) + loss = sum(-log.(max.(getindex.((pred,), idx_all...), 1e-20))) else - loss = sum(-log.(max.(broadcast_getindex(pred, idx_all...), 1e-20)) .* - broadcast_getindex(state.weights_blob.data, idx_all...)) + loss = sum(-log.(max.(getindex.((pred,), idx_all...), 1e-20)) .* + getindex.((state.weights_blob.data,), idx_all...)) end state.loss = state.layer.weight * loss / (prod(dims) / dims[state.op_dim]) end diff --git a/src/layers/softlabel-softmax-loss.jl b/src/layers/softlabel-softmax-loss.jl index f59a51d..6826053 100644 --- a/src/layers/softlabel-softmax-loss.jl +++ b/src/layers/softlabel-softmax-loss.jl @@ -10,7 +10,7 @@ is_sink => true ) -struct SoftlabelSoftmaxLossLayerState{T} <: LayerState +mutable struct SoftlabelSoftmaxLossLayerState{T} <: LayerState layer :: SoftlabelSoftmaxLossLayer loss :: T diff --git a/src/layers/softmax-loss.jl b/src/layers/softmax-loss.jl index ef7206e..bc1f7bf 100644 --- a/src/layers/softmax-loss.jl +++ b/src/layers/softmax-loss.jl @@ -15,7 +15,7 @@ is_sink => true ) -struct SoftmaxLossLayerState{T} <: LayerState +mutable struct SoftmaxLossLayerState{T} <: LayerState layer :: SoftmaxLossLayer loss :: T @@ -67,15 +67,15 @@ function backward(backend::CPUBackend, state::SoftmaxLossLayerState, inputs::Vec copy!(diff, state.softmax.blobs[1]) else copy!(diff, state.softmax.blobs[1].data .* - broadcast_getindex(state.logistic.weights_blob.data, idx_all...)) + getindex.((state.logistic.weights_blob.data,), idx_all...)) end diff_data = reshape(diff.data, dims) if isa(state.logistic.weights_blob, NullBlob) - broadcast_setindex!(diff_data, broadcast_getindex(diff_data, idx_all...)-1, idx_all...) + setindex!.((diff_data,), getindex.((diff_data,), idx_all...) .- 1, idx_all...) else - broadcast_setindex!(diff_data, broadcast_getindex(diff_data, idx_all...) .- - broadcast_getindex(state.logistic.weights_blob.data, idx_all...), idx_all...) + setindex!.((diff_data,), getindex.((diff_data,), idx_all...) .- + getindex.((state.logistic.weights_blob.data,), idx_all...), idx_all...) end Vec.mul_scal!(diff.data, state.layer.weight * dims[state.logistic.op_dim]/prod(dims)) end diff --git a/src/layers/tied-inner-product.jl b/src/layers/tied-inner-product.jl index 29d3701..b2b1aa8 100644 --- a/src/layers/tied-inner-product.jl +++ b/src/layers/tied-inner-product.jl @@ -16,7 +16,7 @@ has_neuron => true ) -struct TiedInnerProductLayerState <: LayerState +mutable struct TiedInnerProductLayerState <: LayerState layer :: TiedInnerProductLayer blobs :: Vector{Blob} blobs_diff :: Vector{Blob} diff --git a/test/layers/accuracy.jl b/test/layers/accuracy.jl index b549f79..2eb2472 100644 --- a/test/layers/accuracy.jl +++ b/test/layers/accuracy.jl @@ -34,7 +34,7 @@ function test_accuracy_layer(backend::Backend, tensor_dim, T) for i = 1:dim_pre for j = 1:dim_post pred = canonical_input[i,:,j] - if indmax(pred) == round(Int, canonical_label[i,1,j])+1 + if argmax(pred) == round(Int, canonical_label[i,1,j])+1 expected_acc += 1 end end diff --git a/test/layers/binary-accuracy.jl b/test/layers/binary-accuracy.jl index f447180..08b9584 100644 --- a/test/layers/binary-accuracy.jl +++ b/test/layers/binary-accuracy.jl @@ -7,8 +7,8 @@ function test_binary_accuracy_layer(backend::Backend, tensor_dim, T, threshold, dims = tuple(rand(6:11, tensor_dim)...) println(" > $dims") if 0 == threshold - preds = rand(T, dims)*4-2 - labels = round.(rand(T, dims))*2-1 + preds = rand(T, dims) .* 4 .- 2 + labels = round.(rand(T, dims)) .* 2 .- 1 elseif 0.5 == threshold preds = round.(rand(T, dims)) labels = rand(T, dims) @@ -17,7 +17,7 @@ function test_binary_accuracy_layer(backend::Backend, tensor_dim, T, threshold, end errs_mask = (preds.>threshold) .≠ (labels.>threshold) n = prod(dims) - n_wrong = countnz(errs_mask) + n_wrong = count(!iszero, errs_mask) n_right = n - n_wrong ############################################################ diff --git a/test/layers/binary-cross-entropy-loss.jl b/test/layers/binary-cross-entropy-loss.jl index 83a154a..f14b3df 100644 --- a/test/layers/binary-cross-entropy-loss.jl +++ b/test/layers/binary-cross-entropy-loss.jl @@ -27,7 +27,7 @@ function test_binary_crossentropy_loss_layer(backend::Backend, tensor_dim, T, ep for i = 1:prod(dims) expected_loss += -log(vec(prob)[i])*vec(label)[i] - expected_loss += -log(1 - vec(prob)[i])*vec(1 - label)[i] + expected_loss += -log(1 .- vec(prob)[i])*vec(1 .- label)[i] end expected_loss /= dims[end] @@ -39,14 +39,14 @@ function test_binary_crossentropy_loss_layer(backend::Backend, tensor_dim, T, ep diff_blob1 = make_blob(backend, prob) diffs = Blob[diff_blob1, diff_blob2] backward(backend, state, inputs, diffs) - grad_pred = -weight * (label./prob - (1-label)./(1-prob) ) / dims[end] + grad_pred = -weight * (label./prob - (1 .- label)./(1 .- prob) ) / dims[end] diff = similar(grad_pred) copy!(diff, diffs[1]) - @test all(-eps .< 1 - grad_pred./diff .< eps) + @test all(-eps .< 1 .- grad_pred./diff .< eps) - grad_label = -weight * log.(prob./(1.-prob)) / dims[end] + grad_label = -weight * log.(prob./(1 .- prob)) / dims[end] diff = similar(grad_pred) copy!(diff, diffs[2]) diff --git a/test/layers/gaussian-kl-loss.jl b/test/layers/gaussian-kl-loss.jl index ccf0c0a..ac405f3 100644 --- a/test/layers/gaussian-kl-loss.jl +++ b/test/layers/gaussian-kl-loss.jl @@ -38,12 +38,12 @@ function test_gaussian_kl_loss_layer(backend::Backend, T, eps) backward(backend, state, inputs, diffs) grad = mus - grad *= weight/get_num(mu_blob) + grad *= weight / get_num(mu_blob) diff = similar(grad) copy!(diff, diffs[1]) @test all(-eps .< grad - diff .< eps) - grad = sigmas - 1./sigmas + grad = sigmas - 1 ./ sigmas grad *= weight/get_num(mu_blob) diff = similar(grad) copy!(diff, diffs[2]) diff --git a/test/layers/hdf5-output.jl b/test/layers/hdf5-output.jl index 5447c71..6ad0276 100644 --- a/test/layers/hdf5-output.jl +++ b/test/layers/hdf5-output.jl @@ -31,7 +31,7 @@ function test_hdf5_output_layer(backend::Backend, T, eps) shutdown(backend, state) - expected_output = cat(tensor_dim, input, input, input) + expected_output = cat(input, input, input, dims=tensor_dim) got_output = h5open(output_fn, "r") do h5 read(h5, "foobar") end diff --git a/test/layers/hinge-loss.jl b/test/layers/hinge-loss.jl index a5f1b35..4f77df6 100644 --- a/test/layers/hinge-loss.jl +++ b/test/layers/hinge-loss.jl @@ -7,8 +7,8 @@ function test_hinge_loss_layer(backend::Backend, T, eps) tensor_dim = abs(rand(Int)) % 4 + 2 dims = tuple(rand(6:11, tensor_dim)...) println(" > $dims") - preds = rand(T, dims)*4-2 - labels = round.(rand(T, dims))*2-1 + preds = rand(T, dims) .* 4 .- 2 + labels = round.(rand(T, dims)) .* 2 .- 1 errs_mask = preds.*labels .< one(T) ############################################################ diff --git a/test/layers/multinomial-logistic-loss.jl b/test/layers/multinomial-logistic-loss.jl index fca6aa8..6495e83 100644 --- a/test/layers/multinomial-logistic-loss.jl +++ b/test/layers/multinomial-logistic-loss.jl @@ -45,7 +45,7 @@ function test_multinomial_logistic_loss_layer(backend::Backend, tensor_dim, clas weights = repeat(reshape(weights, new_shape...), inner=rep_shape) end if class_weights[2] == :local - weights = weights .* (channels ./ sum(weights,op_dim)) + weights = weights .* (channels ./ sum(weights, dims=op_dim)) elseif class_weights[2] == :global weights = weights * (prod(dims[1:end-1]) / sum(weights)) else diff --git a/test/layers/softlabel-softmax-loss.jl b/test/layers/softlabel-softmax-loss.jl index ef8eaac..1229886 100644 --- a/test/layers/softlabel-softmax-loss.jl +++ b/test/layers/softlabel-softmax-loss.jl @@ -5,12 +5,12 @@ function test_softlabel_softmax_loss_layer(backend::Backend, tensor_dim, T, eps) op_dim = max(abs(rand(Int)) % tensor_dim, 1) println(" > $dims (operate on dimension $op_dim)") - input = rand(T, dims) + convert(T, 0.01) + input = rand(T, dims) .+ convert(T, 0.01) input_blob = make_blob(backend, input) diff_blob = make_blob(backend, T, size(input)) - labels = abs.(rand(T, dims)) + convert(T, 0.01) - labels = labels ./ sum(labels, op_dim) + labels = abs.(rand(T, dims)) .+ convert(T, 0.01) + labels = labels ./ sum(labels, dims=op_dim) label_blob = make_blob(backend, labels) inputs = Blob[input_blob, label_blob] @@ -33,10 +33,10 @@ function test_softlabel_softmax_loss_layer(backend::Backend, tensor_dim, T, eps) for i = 1:dim_pre for j = 1:dim_post pred = exp.(canonical_input[i,:,j]) - pred /= sum(pred) + pred ./= sum(pred) expected_loss += sum(-log.(pred) .* canonical_label[i,:,j]) - canonical_grad[i,:,j] = repmat(vec(pred), 1, dim_prob) * vec(canonical_label[i,:,j]) + canonical_grad[i,:,j] = repeat(vec(pred), 1, dim_prob) * vec(canonical_label[i,:,j]) canonical_grad[i,:,j] -= canonical_label[i,:,j] end end diff --git a/test/layers/softmax-loss.jl b/test/layers/softmax-loss.jl index 2e8c554..9a9966f 100644 --- a/test/layers/softmax-loss.jl +++ b/test/layers/softmax-loss.jl @@ -12,14 +12,14 @@ function test_softmax_loss_layer(backend::Backend, tensor_dim, use_weights::Bool dims_label = tuple(dims_label...) println(" > $dims (operate on dimension $op_dim)") - input = rand(T, dims) + convert(T, 0.01) + input = rand(T, dims) .+ convert(T, 0.01) input_blob = make_blob(backend, input) diff_blob = make_blob(backend, T, size(input)) if use_weights - weights = rand(T, dims[1:end-1]) + convert(T, 0.1) - weights = weights .* (dims[op_dim] ./ sum(weights,op_dim)) + weights = rand(T, dims[1:end-1]) .+ convert(T, 0.1) + weights = weights .* (dims[op_dim] ./ sum(weights, dims=op_dim)) else weights = [] end diff --git a/test/layers/square-loss.jl b/test/layers/square-loss.jl index 85a5d1b..767efa4 100644 --- a/test/layers/square-loss.jl +++ b/test/layers/square-loss.jl @@ -1,3 +1,5 @@ +import LinearAlgebra: norm + function test_square_loss_layer(backend::Backend, T, eps) println("-- Testing SquareLossLayer on $(typeof(backend)){$T}...") @@ -26,7 +28,7 @@ function test_square_loss_layer(backend::Backend, T, eps) forward(backend, state, inputs) - loss = 0.5*vecnorm(preds-labels)^2 / dims[end] + loss = 0.5*norm(preds-labels)^2 / dims[end] @test -eps < loss-state.loss < eps backward(backend, state, inputs, diffs) diff --git a/test/layers/tied-inner-product.jl b/test/layers/tied-inner-product.jl index 094fb5f..b70a85d 100644 --- a/test/layers/tied-inner-product.jl +++ b/test/layers/tied-inner-product.jl @@ -48,7 +48,7 @@ function test_tied_inner_product_layer(backend::Backend, n_input, T, eps) backward(net) bias_grad = to_array(net.states[3].∇b) - bias_grad_expected = sum([sum(top_diffs[i],2) for i = 1:n_input]) + bias_grad_expected = sum([sum(top_diffs[i], dims=2) for i = 1:n_input]) @test all(abs.(bias_grad - bias_grad_expected) .< eps) for i = 1:n_input diff --git a/test/runtests.jl b/test/runtests.jl index 7d8a19e..58abf2c 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -26,6 +26,7 @@ end # or modifying unit-tests function test_dir(dir) map(reverse(Mocha.glob(dir, r".*\.jl$", sort_by=:mtime))) do file + println("-- $dir/$file") include("$dir/$file") end end @@ -33,8 +34,8 @@ end ############################################################ # Solvers ############################################################ -#+ include("solvers/test-adam-solver.jl") -#+ include("solvers/test-sgd-solver.jl") +include("solvers/test-adam-solver.jl") +include("solvers/test-sgd-solver.jl") ############################################################ # Network From ef1f968decdff71bdb703488e6ca9dbae6ad0fc6 Mon Sep 17 00:00:00 2001 From: Chiyuan Zhang Date: Wed, 21 Nov 2018 19:37:35 -0800 Subject: [PATCH 24/24] fix unit test for solvers --- src/coffee-break.jl | 2 +- src/solvers.jl | 4 ++-- src/solvers/adadelta.jl | 4 ++-- src/solvers/adagrad.jl | 4 ++-- src/solvers/adam.jl | 2 +- src/solvers/nesterov.jl | 4 ++-- src/solvers/sgd.jl | 4 ++-- test/runtests.jl | 26 +++++++++++++------------- test/solvers/test-sgd-solver.jl | 4 +++- 9 files changed, 28 insertions(+), 26 deletions(-) diff --git a/src/coffee-break.jl b/src/coffee-break.jl index 45e1332..7028047 100644 --- a/src/coffee-break.jl +++ b/src/coffee-break.jl @@ -38,7 +38,7 @@ using HDF5, JLD const StatisticsValue = AbstractFloat const StatisticsRecords = Dict{Int, StatisticsValue} -struct CoffeeLounge +mutable struct CoffeeLounge filename :: AbstractString save_every_n_iter :: Int file_exists :: Symbol # :overwrite, :panic, :merge diff --git a/src/solvers.jl b/src/solvers.jl index 6e4aed3..ce8a683 100644 --- a/src/solvers.jl +++ b/src/solvers.jl @@ -10,7 +10,7 @@ import Base.Meta: quot @compat abstract type InternalSolverState end # All the state a solver needs to update an iteration const SolverParameters = Dict{Symbol,Any} -struct Solver{T<:SolverMethod} +mutable struct Solver{T<:SolverMethod} method :: T params :: SolverParameters coffee_lounge :: Any # forward declaration @@ -21,7 +21,7 @@ Solver(method::T, params::SolverParameters) where {T} = begin Solver(method, params, CoffeeLounge()) end -struct SolverState{T<:InternalSolverState} +mutable struct SolverState{T<:InternalSolverState} iter :: Int obj_val :: Float64 losses :: Dict diff --git a/src/solvers/adadelta.jl b/src/solvers/adadelta.jl index c29455e..b11ce96 100644 --- a/src/solvers/adadelta.jl +++ b/src/solvers/adadelta.jl @@ -13,13 +13,13 @@ make_solver_parameters(method::Adadelta; kwargs...) = validate_parameters(method::Adadelta, params::SolverParameters) = validate_parameters(params, :rho, :eps) -struct AdadeltaSolverState <: InternalSolverState +mutable struct AdadeltaSolverState <: InternalSolverState param_states :: Vector{LayerState} gradients_sq :: Vector{Vector{Blob}} deltas_sq :: Vector{Vector{Blob}} end -struct AdadeltaSolverSnapshot <: SolverStateSnapshot +mutable struct AdadeltaSolverSnapshot <: SolverStateSnapshot iteration :: Int obj_val :: Float64 end diff --git a/src/solvers/adagrad.jl b/src/solvers/adagrad.jl index 4e72838..73e736a 100644 --- a/src/solvers/adagrad.jl +++ b/src/solvers/adagrad.jl @@ -15,12 +15,12 @@ make_solver_parameters(method::Adagrad; kwargs...)= validate_parameters(method::Adagrad, params::SolverParameters) = validate_parameters(params, :gamma, :epsilon) -struct AdagradSolverState <: InternalSolverState +mutable struct AdagradSolverState <: InternalSolverState param_states :: Vector{LayerState} param_history :: Vector{Vector{Blob}} end -struct AdagradSolverSnapshot <: SolverStateSnapshot +mutable struct AdagradSolverSnapshot <: SolverStateSnapshot iteration :: Int obj_val :: Float64 end diff --git a/src/solvers/adam.jl b/src/solvers/adam.jl index 27bbdd5..ead76cb 100644 --- a/src/solvers/adam.jl +++ b/src/solvers/adam.jl @@ -16,7 +16,7 @@ validate_parameters(solver::Adam, params::SolverParameters) = begin validate_parameters(params, :lr_policy, :beta1, :beta2, :epsilon) end -struct AdamSolverState <: InternalSolverState +mutable struct AdamSolverState <: InternalSolverState param_states :: Vector{LayerState} grad_1st_moment_est :: Vector{Vector{Blob}} # Exponentially weighted moving average - biased estimate of 1st moment of gradient grad_2nd_moment_est :: Vector{Vector{Blob}} # Exponentially weighted moving average - biased estimate of raw 2nd moment of gradient diff --git a/src/solvers/nesterov.jl b/src/solvers/nesterov.jl index d47caec..214583d 100644 --- a/src/solvers/nesterov.jl +++ b/src/solvers/nesterov.jl @@ -7,7 +7,7 @@ struct Nesterov <: SolverMethod end -struct NesterovSolverState <: InternalSolverState +mutable struct NesterovSolverState <: InternalSolverState learning_rate :: Float64 momentum :: Float64 param_states :: Vector{LayerState} @@ -15,7 +15,7 @@ struct NesterovSolverState <: InternalSolverState last_momentum :: Float64 end -struct NesterovSolverSnapshot <: SolverStateSnapshot +mutable struct NesterovSolverSnapshot <: SolverStateSnapshot iteration :: Int obj_val :: Float64 learning_rate :: Float64 diff --git a/src/solvers/sgd.jl b/src/solvers/sgd.jl index 2cbf47b..416d6e4 100644 --- a/src/solvers/sgd.jl +++ b/src/solvers/sgd.jl @@ -10,7 +10,7 @@ make_solver_parameters(method::SGD; kwargs...) = merge(make_solver_parameters(), defaultDict, SolverParameters(kwargs)) -struct SGDSolverState <: InternalSolverState +mutable struct SGDSolverState <: InternalSolverState learning_rate :: Float64 momentum :: Float64 param_states :: Vector{LayerState} @@ -18,7 +18,7 @@ struct SGDSolverState <: InternalSolverState last_momentum :: Float64 end -struct SGDSolverSnapshot <: SolverStateSnapshot +mutable struct SGDSolverSnapshot <: SolverStateSnapshot iteration :: Int obj_val :: Float64 learning_rate :: Float64 diff --git a/test/runtests.jl b/test/runtests.jl index 58abf2c..510bffc 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -40,16 +40,16 @@ include("solvers/test-sgd-solver.jl") ############################################################ # Network ############################################################ -#- include("net/topology.jl") -#- include("net/test-gradient-simple.jl") +include("net/topology.jl") +include("net/test-gradient-simple.jl") ############################################################ # Utilities functions ############################################################ -#- include("utils/ref-count.jl") -#- include("utils/glob.jl") -#- include("utils/blas.jl") -#- include("utils/blob-reshape.jl") +include("utils/ref-count.jl") +include("utils/glob.jl") +include("utils/blas.jl") +include("utils/blob-reshape.jl") if test_gpu include("cuda/padded-copy.jl") @@ -61,21 +61,21 @@ end ############################################################ # Activation Functions ############################################################ -#- include("neurons/relu.jl") -#- include("neurons/sigmoid.jl") -#- include("neurons/tanh.jl") -#- include("neurons/exponential.jl") +include("neurons/relu.jl") +include("neurons/sigmoid.jl") +include("neurons/tanh.jl") +include("neurons/exponential.jl") ############################################################ # Regularizers ############################################################ -#- include("regularizers/l2.jl") -#- include("regularizers/l1.jl") +include("regularizers/l2.jl") +include("regularizers/l1.jl") ############################################################ # Constraints ############################################################ -#- include("constraints/l2.jl") +include("constraints/l2.jl") ############################################################ # Data Transformers diff --git a/test/solvers/test-sgd-solver.jl b/test/solvers/test-sgd-solver.jl index 8243981..4754782 100644 --- a/test/solvers/test-sgd-solver.jl +++ b/test/solvers/test-sgd-solver.jl @@ -1,7 +1,9 @@ +import Random + function test_sgd_solver(backend) println("-- Testing simple SGD solver call") registry_reset(backend) - srand(12345678) + Random.seed!(12345678) ############################################################ # Prepare Random Data ############################################################