Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/Mocha.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ include("exception.jl")
include("utils/blas.jl")
include("utils/math.jl")
include("utils/io.jl")
include("utils/tensor.jl")

if Config.use_native_extension
include("utils/im2col-native.jl")
Expand Down
12 changes: 6 additions & 6 deletions src/cuda/layers/accuracy.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
function setup_etc(backend::GPUBackend, layer::AccuracyLayer, inputs)
width, height, channels, num = get_whcn(inputs[1])
etc = make_blob(backend, eltype(inputs[1]), (width,height,1,num))
function setup_etc(backend::GPUBackend, layer::AccuracyLayer, op_dim::Int, inputs)
dims = [size(inputs[1])...]
dims[op_dim] = 1
etc = make_blob(backend, eltype(inputs[1]), dims...)
return etc
end
function shutdown(backend::GPUBackend, state::AccuracyLayerState)
Expand All @@ -11,8 +12,7 @@ function forward(backend::GPUBackend, state::AccuracyLayerState, inputs::Vector{
pred = inputs[1]
label = inputs[2]

width, height, channels, num = get_whcn(pred)
spatial_dim = width*height
spatial_dim, pred_dim, num = split_dims(pred, state.op_dim)
data_type = eltype(pred)

x_block = int(ceil(float64(num)/CUDA.THREADS_PER_BLOCK_X));
Expand All @@ -26,7 +26,7 @@ function forward(backend::GPUBackend, state::AccuracyLayerState, inputs::Vector{
error("Unsupported data type $data_type")
end
CUDA.launch(kernel, (x_block,y_block),(CUDA.THREADS_PER_BLOCK_X,CUDA.THREADS_PER_BLOCK_Y),
(pred.ptr.p, label.ptr.p, state.etc.ptr.p, num, channels, spatial_dim));
(pred.ptr.p, label.ptr.p, state.etc.ptr.p, num, pred_dim, spatial_dim));

N = num * spatial_dim
accuracy = CuBLAS.dot(backend.cublas_ctx, data_type, N, state.etc.ptr, 1, state.etc.ptr, 1)
Expand Down
3 changes: 1 addition & 2 deletions src/cuda/layers/argmax.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@ function forward(backend::GPUBackend, state::ArgmaxLayerState, inputs::Vector{Bl
input = inputs[i]
output = state.blobs[i]

width, height, channels, num = get_whcn(input)
spatial_dim = width*height
spatial_dim, channels, num = split_dims(input, state.dims[i])
data_type = eltype(input)

x_block = int(ceil(float64(num)/CUDA.THREADS_PER_BLOCK_X));
Expand Down
44 changes: 19 additions & 25 deletions src/cuda/layers/channel-pooling.jl
Original file line number Diff line number Diff line change
@@ -1,16 +1,14 @@
function setup_etc(backend::GPUBackend, layer::ChannelPoolingLayer, inputs, pooled_chann)
function setup_etc(backend::GPUBackend, layer::ChannelPoolingLayer, inputs, blobs)
if isa(layer.pooling, Pooling.Max)
masks = Array(CuPtr, length(inputs))
for i = 1:length(inputs)
masks[i] = CUDA.cualloc(Csize_t, get_width(inputs[i]) * get_height(inputs[i]) *
pooled_chann[i] * get_num(inputs[i]))
masks[i] = CUDA.cualloc(Csize_t, length(blobs[i]))
end
etc = masks
elseif isa(layer.pooling, Pooling.Mean)
integrals = Array(CuPtr, length(inputs))
for i = 1:length(inputs)
integrals[i] = CUDA.cualloc(eltype(inputs[i]), get_width(inputs[i]) * get_height(inputs[i]) *
get_chann(inputs[i]))
integrals[i] = CUDA.cualloc(eltype(inputs[i]), prod(size(inputs[i])[1:end-1]))
end
etc = integrals
else
Expand Down Expand Up @@ -39,9 +37,9 @@ function forward(backend::GPUBackend, pool::StdPoolingFunction,
output = state.blobs[i]

if isa(pool, Pooling.Max)
cuda_max_channel_pooling_forward(backend, input, output, state.etc[i], state.layer)
cuda_max_channel_pooling_forward(backend, input, output, state.etc[i], state.layer, state.op_dims[i])
elseif isa(pool, Pooling.Mean)
cuda_mean_channel_pooling_forward(backend, input, output, state.etc[i], state.layer)
cuda_mean_channel_pooling_forward(backend, input, output, state.etc[i], state.layer, state.op_dims[i])
else
error("Pooling for $pool not implemented yet")
end
Expand All @@ -59,9 +57,9 @@ function backward(backend::GPUBackend, pool::StdPoolingFunction, state::ChannelP
diff = diffs[i]
if !isa(diff, NullBlob)
if isa(pool, Pooling.Max)
cuda_max_channel_pooling_backward(backend, diff, state.blobs_diff[i], state.etc[i], state.layer)
cuda_max_channel_pooling_backward(backend, diff, state.blobs_diff[i], state.etc[i], state.layer, state.op_dims[i])
elseif isa(pool, Pooling.Mean)
cuda_mean_channel_pooling_backward(backend, diff, state.blobs_diff[i], state.layer)
cuda_mean_channel_pooling_backward(backend, diff, state.blobs_diff[i], state.layer, state.op_dims[i])
else
error("Pooling for $pool not implemented yet")
end
Expand All @@ -72,15 +70,14 @@ function backward(backend::GPUBackend, pool::StdPoolingFunction, state::ChannelP
end

function cuda_mean_channel_pooling_forward{T}(backend::GPUBackend, input::CuTensorBlob{T},
output::CuTensorBlob{T}, integral::CuPtr, layer)
output::CuTensorBlob{T}, integral::CuPtr, layer, op_dim)

width, height, channels, num = size(input)
pooled_chann = size(output, 3)
spatial_dim_T, channels, num = split_dims(input, op_dim)
pooled_chann = size(output, op_dim)
one = convert(T, 1)
neg_one = convert(T, -1)
scale = convert(T, 1.0/layer.kernel)

spatial_dim_T = width*height
spatial_dim = spatial_dim_T * sizeof(T)
fea_dim = spatial_dim * channels
output_fea_dim = spatial_dim * pooled_chann
Expand Down Expand Up @@ -118,15 +115,14 @@ function cuda_mean_channel_pooling_forward{T}(backend::GPUBackend, input::CuTens
end

function cuda_mean_channel_pooling_backward{T}(backend::GPUBackend, input::CuTensorBlob{T},
output::CuTensorBlob{T}, layer)
output::CuTensorBlob{T}, layer, op_dim)

width, height, channels, num = size(input)
pooled_chann = size(output, 3)
spatial_dim_T, channels, num = split_dims(input, op_dim)
pooled_chann = size(output, op_dim)
scale = 1/convert(T, layer.kernel)

fill!(input, 0)

spatial_dim_T = width*height
spatial_dim = spatial_dim_T * sizeof(T)
fea_dim = spatial_dim * channels
output_fea_dim = spatial_dim * pooled_chann
Expand Down Expand Up @@ -159,11 +155,10 @@ function cuda_geometry_max_chann_pool(sp_dim::Int, num::Int)

end
function cuda_max_channel_pooling_forward{T}(backend::GPUBackend, input::CuTensorBlob{T},
output::CuTensorBlob{T}, mask::CuPtr, layer)
output::CuTensorBlob{T}, mask::CuPtr, layer, op_dim)

width, height, channels, num = size(input)
sp_dim = width*height
pooled_chann = get_chann(output)
sp_dim, channels, num = split_dims(input, op_dim)
pooled_chann = size(output, op_dim)

cuda_dim = cuda_geometry_max_chann_pool(sp_dim, num);
if T == Float32
Expand All @@ -179,11 +174,10 @@ function cuda_max_channel_pooling_forward{T}(backend::GPUBackend, input::CuTenso
end

function cuda_max_channel_pooling_backward{T}(backend::GPUBackend, input::CuTensorBlob{T},
output::CuTensorBlob{T}, mask::CuPtr, layer)
output::CuTensorBlob{T}, mask::CuPtr, layer, op_dim)

width, height, channels, num = size(input)
sp_dim = width*height
pooled_chann = get_chann(output)
sp_dim, channels, num = split_dims(input, op_dim)
pooled_chann = size(output, op_dim)

cuda_dim = cuda_geometry_max_chann_pool(sp_dim, num);
if T == Float32
Expand Down
4 changes: 1 addition & 3 deletions src/cuda/layers/multinomial-logistic-loss.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,7 @@ function forward(backend::GPUBackend, state::MultinomialLogisticLossLayerState,
label = inputs[2]
data_type = eltype(pred)

width, height, channels, num = get_whcn(pred)

spatial_dim = height*width
spatial_dim, channels, num = split_dims(pred, state.op_dim)
prob_dim = channels

x_block = int(ceil(float64(num)/CUDA.THREADS_PER_BLOCK_X))
Expand Down
4 changes: 1 addition & 3 deletions src/cuda/layers/softmax-loss.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,7 @@ function backward(backend::GPUBackend, state::SoftmaxLossLayerState, inputs::Vec
copy!(diff, state.softmax.blobs[1])

data_type = eltype(diff)
height, width, channels, num = get_whcn(diff)

spatial_dim = height*width
spatial_dim, channels, num = split_dims(diff, state.logistic.op_dim)
prob_dim = channels

x_block = int(ceil(float64(num)/CUDA.THREADS_PER_BLOCK_X))
Expand Down
7 changes: 4 additions & 3 deletions src/cuda/layers/softmax.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@ type CuDNNSoftmaxState
outputs_desc :: Vector{CuDNN.Tensor4dDescriptor}
end

function setup_etc(backend::GPUBackend, layer::SoftmaxLayer, data_type, inputs)
function setup_etc(backend::GPUBackend, layer::SoftmaxLayer, dims::Vector{Int}, data_type, inputs)
inputs_desc = Array(CuDNN.Tensor4dDescriptor, length(inputs))
outputs_desc = Array(CuDNN.Tensor4dDescriptor, length(inputs))
for i = 1:length(inputs)
inputs_desc[i] = CuDNN.create_tensor4d_descriptor(data_type, get_whcn(inputs[i]))
outputs_desc[i] = CuDNN.create_tensor4d_descriptor(data_type, get_whcn(inputs[i]))
dim_sp, dim_prob, dim_num = split_dims(inputs[i], dims[i])
inputs_desc[i] = CuDNN.create_tensor4d_descriptor(data_type, (1,dim_sp,dim_prob,dim_num))
outputs_desc[i] = CuDNN.create_tensor4d_descriptor(data_type, (1,dim_sp,dim_prob,dim_num))
end
etc = CuDNNSoftmaxState(inputs_desc, outputs_desc)
return etc
Expand Down
29 changes: 17 additions & 12 deletions src/layers/accuracy.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
@defstruct AccuracyLayer Layer (
name :: String = "accuracy",
report_error :: Bool = false,
(dim :: Int = -2, dim != 0),
(bottoms :: Vector{Symbol} = Symbol[], length(bottoms) == 2),
)
@characterize_layer(AccuracyLayer,
Expand All @@ -11,18 +12,24 @@
type AccuracyLayerState <: LayerState
layer :: AccuracyLayer

op_dim :: Int
accuracy :: Float64
n_accum :: Int
etc :: Any
end

function setup_etc(backend::CPUBackend, layer::AccuracyLayer, inputs)
function setup_etc(backend::CPUBackend, layer::AccuracyLayer, op_dim::Int, inputs)
nothing
end

function setup(backend::Backend, layer::AccuracyLayer, inputs::Vector{Blob}, diffs::Vector{Blob})
etc = setup_etc(backend, layer, inputs)
return AccuracyLayerState(layer, 0.0, 0, etc)
total_dim = ndims(inputs[1])
dim = layer.dim < 0 ? layer.dim + total_dim + 1 : layer.dim
@assert 1 <= dim <= total_dim
@assert dim != total_dim

etc = setup_etc(backend, layer, dim, inputs)
return AccuracyLayerState(layer, dim, 0.0, 0, etc)
end
function shutdown(backend::CPUBackend, state::AccuracyLayerState)
end
Expand All @@ -48,20 +55,18 @@ function forward(backend::CPUBackend, state::AccuracyLayerState, inputs::Vector{
pred = inputs[1].data
label = inputs[2].data

width, height, channels, num = get_whcn(pred)
canonical_pred = reshape(pred, (width,height,channels,num))
canonical_label = reshape(label, (width,height,1,num))
dim_pre, dim_prob, dim_post = split_dims(pred, state.op_dim)

accuracy = 0.0
for w = 1:width
for h = 1:height
for n = 1:num
if int(canonical_label[w,h,1,n])+1 == indmax(canonical_pred[w,h,:,n])
accuracy += 1.0
end
for i = 0:dim_pre-1
for j = 0:dim_post-1
idx = Int[i + dim_pre*(k + dim_prob*j) for k=0:dim_prob-1] + 1
@inbounds if int(label[i + dim_pre*j + 1])+1 == indmax(pred[idx])
accuracy += 1.0
end
end
end

state.accuracy = float64(state.accuracy * state.n_accum + accuracy) / (state.n_accum + length(label))
state.n_accum += length(label)
end
Expand Down
47 changes: 27 additions & 20 deletions src/layers/argmax.jl
Original file line number Diff line number Diff line change
@@ -1,45 +1,52 @@
@defstruct ArgmaxLayer Layer (
name :: String = "argmax",
(dim :: Int = -2, dim != 0),
(tops :: Vector{Symbol} = Symbol[], length(tops) > 0),
(bottoms :: Vector{Symbol} = Symbol[], length(bottoms) == length(tops)),
)

type ArgmaxLayerState <: LayerState
layer :: ArgmaxLayer
blobs :: Vector{Blob}

dims :: Vector{Int}
end

function setup(backend::Backend, layer::ArgmaxLayer, inputs::Vector{Blob}, diffs::Vector{Blob})
blobs = map(inputs) do input
width, height, channels, num = get_whcn(input)
data_type = eltype(input)

blob = make_blob(backend, data_type, width, height, 1, num)
blob
dims = Array(Int, length(inputs))
blobs = Array(Blob, length(inputs))
for i = 1:length(inputs)
total_dim = ndims(inputs[i])
dim = layer.dim < 0 ? layer.dim + total_dim + 1 : layer.dim
@assert 1 <= dim <= total_dim
@assert dim != total_dim
dims[i] = dim
shape = [size(inputs[i])...]
shape[dim] = 1
blobs[i] = make_blob(backend, eltype(inputs[i]), shape...)
end

return ArgmaxLayerState(layer, blobs)
return ArgmaxLayerState(layer, blobs, dims)
end

function forward(backend::CPUBackend, state::ArgmaxLayerState, inputs::Vector{Blob})
for i = 1:length(inputs)
input = inputs[i].data
output = state.blobs[i].data
width, height, channels, num = get_whcn(input)
canonical_input = reshape(input, (width,height,channels,num))
for n = 1:num
for w = 1:width
for h = 1:height
maxc = 1; maxval = canonical_input[w,h,maxc,n]
for c = 2:channels
@inbounds val = canonical_input[w,h,c,n]
if val > maxval
maxval = val
maxc = c
end
pre_dim, mid_dim, post_dim = split_dims(input, state.dims[i])
for x = 0:pre_dim-1
for z = 0:post_dim-1
idx = Int[x + pre_dim*(y + mid_dim*z) for y=0:mid_dim-1] + 1
maxc = 1
@inbounds maxval = input[idx[1]]
for y = 2:length(idx)
@inbounds val = input[idx[y]]
if val > maxval
maxval = val
maxc = y
end
@inbounds output[w,h,1,n] = maxc-1
end
@inbounds output[x + pre_dim*z + 1] = maxc-1
end
end
end
Expand Down
Loading