Skip to content

Commit

Permalink
Merge pull request #51 from PumasAI/docstring
Browse files Browse the repository at this point in the history
Add Docstrings
  • Loading branch information
ChrisRackauckas authored Apr 14, 2022
2 parents c4a4d74 + 8a8556c commit 2485545
Show file tree
Hide file tree
Showing 14 changed files with 225 additions and 16 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "SimpleChains"
uuid = "de6bee2f-e2f4-4ec7-b6ed-219cc6f6e9e5"
authors = ["Chris Elrod <[email protected]> and contributors"]
version = "0.2.1"
version = "0.2.2"

[deps]
ArrayInterface = "4fba245c-0d91-5ea0-9b3e-6abc04ee57a9"
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ using SimpleChains, BenchmarkTools
x = rand(24, 200); # 24 inputs per 200 observations

# 2 responses each per 200 observations
y = StrideArray{Float64}(undef, (static(2),200)) .= randn.() .* 10;
y = Matrix{Float64}(undef, 2, 200) .= randn.() .* 10;

schain = SimpleChain(
static(24), # input dimension (optional)
Expand Down
5 changes: 5 additions & 0 deletions src/activation.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@


# Elementwise transforms
"""
Activation(activation)
Applies `activation` function elementwise.
"""
struct Activation{F}
f::F
end
Expand Down
12 changes: 12 additions & 0 deletions src/conv.jl
Original file line number Diff line number Diff line change
Expand Up @@ -680,6 +680,18 @@ function convlayeradjA!(
end
end

"""
Conv(activation, dims::Tuple{Vararg{Integer}}, outputdim::Integer)
Performs a convolution with `dims` and maps it to `outputdim` output channels, then
adds a bias (one per `outputdim`) and applies `activation` elementwise.
E.g., `Conv(relu, (5, 5), 16)` performs a `5 × 5` convolution, and maps the input
channels to 16 output channels, before adding a bias and applying `relu`.
Randomly initializing weights using the (Xavier) Glorot uniform distribution.
The bias is zero-initialized.
"""
struct Conv{F,D<:Tuple{Vararg{Integer}},O<:Integer}
dim::D
outputdim::O
Expand Down
8 changes: 7 additions & 1 deletion src/dense.jl
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@

"""
TurboDense{B}(outputdim, activation)
TurboDense{B=true}(activation, outputdim::Integer)
Linear (dense) layer.
- `B` specifies whether the layer includes a bias term.
- The `activation` function is applied elementwise to the result.
- `outputdim` indicates how many dimensions the input is mapped to.
Randomly initializing weights using the (Xavier) Glorot normal distribution.
The bias is zero-initialized.
"""
struct TurboDense{B,I<:Integer,F}
f::F
Expand Down
2 changes: 1 addition & 1 deletion src/dropout.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
using VectorizedRNG

"""
Dropout(p) # 0 < p < 1
Dropout(p) # 0 < p < 1
Dropout layer.
Expand Down
17 changes: 16 additions & 1 deletion src/flatten.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,20 @@


"""
Flatten{N}()
Flattens the first `N` dimensions. E.g.,
```julia
julia> Flatten{2}()(rand(2,3,4))
6×4 Matrix{Float64}:
0.0609115 0.597285 0.279899 0.888223
0.0667422 0.315741 0.351003 0.805629
0.678297 0.350817 0.984215 0.399418
0.125801 0.566696 0.96873 0.57744
0.331961 0.350742 0.59598 0.741998
0.26345 0.144635 0.076433 0.330475
```
"""
struct Flatten{N} end
Flatten(N) = Flatten{convert(Int, N)::Int}()
@generated _dec(::Flatten{N}) where {N} = Flatten{N - 1}()
Expand Down
19 changes: 19 additions & 0 deletions src/loss.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
abstract type AbstractLoss{Y} end

has_loss(sc::SimpleChain) = last(sc.layers) isa AbstractLoss
"""
add_loss(chn, l::AbstractLoss)
Add the loss function `l` to the simple chain. The loss function
should hold the target you're trying to fit.
"""
function add_loss(sc::SimpleChain, l::AbstractLoss)
id = chain_input_dims(sc)
if has_loss(sc)
Expand Down Expand Up @@ -49,6 +55,11 @@ function layer_output_size(::Val{T}, sl::AbstractLoss, s) where {T}
_layer_output_size_no_temp(Val{T}(), sl, s)
end

"""
SquaredLoss(target)
Calculates half of mean squared loss of the target.
"""
struct SquaredLoss{Y} <: AbstractLoss{Y}
y::Y
end
Expand Down Expand Up @@ -93,7 +104,11 @@ function (sl::SquaredLoss{<:AbstractArray{<:Number}})(arg::AbstractArray{T,N}, p
T(0.5/size(arg,N)) * s, p, pu
end

"""
AbsoluteLoss
Calculates mean absolute loss of the target.
"""
struct AbsoluteLoss{Y} <: AbstractLoss{Y}
y::Y
end
Expand Down Expand Up @@ -144,7 +159,11 @@ function (sl::AbstractLoss{<:AbstractArray{<:AbstractArray}})(arg, p, pu)
return s, p, pu
end

"""
LogitCrossEntropyLoss
Calculates mean logit cross-entropy loss.
"""
struct LogitCrossEntropyLoss{Y<:Union{AbstractVector{UInt32},Nothing}} <: AbstractLoss{Y}
y::Y
end
Expand Down
5 changes: 5 additions & 0 deletions src/maxpool.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@

"""
MaxPool(dims::Tuple{Vararg{Integer}}
Calculates the maximum of pools of size `dims`.
"""
struct MaxPool{D} end
MaxPool(x::Tuple{Vararg{Integer}}) = MaxPool{map(Int, x)}()
MaxPool(x::Vararg{Integer}) = MaxPool{map(Int, x)}()
Expand Down
92 changes: 89 additions & 3 deletions src/optimize.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@

abstract type AbstractOptimizer end

"""
ADAM(η = 0.001, β = (0.9, 0.999))
ADAM optimizer.
"""
struct ADAM <: AbstractOptimizer
η::Float64
β::Tuple{Float64,Float64}
Expand Down Expand Up @@ -57,7 +62,10 @@ function update!(g::AbstractVector, opt, Xp, layers, pen, sx, p, pm, optbuffer,
end
function chain_valgrad_thread!((g, Xp, layers, p, pm, mpt), start, stop)
batchsize = size(Xp, ndims(Xp))
start > stop && return nothing
if start > stop
fill!(g, zero(eltype(g)))
return nothing
end
off = start - 1
nt = size(g, static(2))
goff = stride(g, static(2)) * sizeof(eltype(g)) * off
Expand Down Expand Up @@ -119,7 +127,10 @@ function shuffle_chain_valgrad_thread!(

fm1 = off * batchsize + pstart + min(r, off)
lastdim = batchsize + (start <= r)
((lastdim > 0) & (subrangelen > 0)) || return nothing
if !((lastdim > 0) & (subrangelen > 0))
# fill!(g, 0)
return nothing
end
l = fm1 + lastdim

loss = last(layers)
Expand Down Expand Up @@ -167,6 +178,49 @@ function shuffle_update!(
pstop,
)
nthread = size(g, static(2))
#=
batchsize = pstop - pstart
if batchsize < nthread
gpb = preserve_buffer(g)
GC.@preserve gpb begin
if batchsize == 1
gv = PtrArray(pointer(g), (length(p),))
return shuffle_update!(
gv,
opt,
Xp,
layers,
pen,
sx,
p,
pm,
optbuffer,
mpt,
perm,
pstart,
pstop,
)
else
gm = PtrArray(stridedpointer(g), (length(p), batchsize), Val{(true, false)}())
return shuffle_update!(
gm,
opt,
Xp,
layers,
pen,
sx,
p,
pm,
optbuffer,
mpt,
perm,
pstart,
pstop,
)
end
end
end
=#
Polyester.batch(
shuffle_chain_valgrad_thread!,
(nthread, nthread),
Expand Down Expand Up @@ -215,7 +269,6 @@ function shuffle_update!(
update!(opt, optbuffer, p, g)
end


function train_unbatched!(g, p, _chn::Chain, X, opt::AbstractOptimizer, t::AbstractArray)
if g isa AbstractMatrix && size(g,2) == 1
gpb = preserve_buffer(g)
Expand All @@ -241,6 +294,19 @@ function train_unbatched!(g, p, _chn::Chain, X, opt::AbstractOptimizer, t::Abstr
end
p
end
"""
train_unbatched!(g::AbstractVecOrMat, p, chn, X, opt, iters)
Train without batching inputs.
Arguments:
- `g` pre-allocated gradient buffer. Can be allocated with `similar(p)` (if you want to run single threaded), or `alloc_threaded_grad(chn, size(X))` (`size(X)` argument is only necessary if the input dimension was not specified when constructing the chain). If a matrix, the number of columns gives how many threads to use. Do not use more threads than batch size would allow.
- `p` is the parameter vector. It is updated inplace. It should be pre-initialized, e.g. with `init_params`/`init_params!`. This is to allow calling `train_unbatched!` several times to train in increments.
- `chn` is the `SimpleChain`. It must include a loss (see `SimpleChains.add_loss`) containing the target information (dependent variables) you're trying to fit.
- `X` the training data input argument (independent variables).
- `opt` is the optimizer. Currently, only `SimpleChains.ADAM` is supported.
- `iters`, how many iterations to train for.
"""
function train_unbatched!(g, p, _chn::Chain, X, opt::AbstractOptimizer, iters::Int)
if g isa AbstractMatrix && size(g,2) == 1
gpb = preserve_buffer(g)
Expand Down Expand Up @@ -320,6 +386,20 @@ end
@inline view_slice_last(X::AbstractArray{<:Any,3}, r) = view(X, :, :, r)
@inline view_slice_last(X::AbstractArray{<:Any,4}, r) = view(X, :, :, :, r)
@inline view_slice_last(X::AbstractArray{<:Any,5}, r) = view(X, :, :, :, :, r)
"""
train_batched!(g::AbstractVecOrMat, p, chn, X, opt, iters; batchsize = nothing)
Train while batching arguments.
Arguments:
- `g` pre-allocated gradient buffer. Can be allocated with `similar(p)` (if you want to run single threaded), or `alloc_threaded_grad(chn, size(X))` (`size(X)` argument is only necessary if the input dimension was not specified when constructing the chain). If a matrix, the number of columns gives how many threads to use. Do not use more threads than batch size would allow.
- `p` is the parameter vector. It is updated inplace. It should be pre-initialized, e.g. with `init_params`/`init_params!`. This is to allow calling `train_unbatched!` several times to train in increments.
- `chn` is the `SimpleChain`. It must include a loss (see `SimpleChains.add_loss`) containing the target information (dependent variables) you're trying to fit.
- `X` the training data input argument (independent variables).
- `opt` is the optimizer. Currently, only `SimpleChains.ADAM` is supported.
- `iters`, how many iterations to train for.
- `batchsize` keyword argument: the size of the batches to use. If `batchsize = nothing`, it'll try to do a half-decent job of picking the batch size for you. However, this is not well optimized at the moment.
"""
function train_batched!(
g::AbstractVecOrMat,
p::AbstractVector,
Expand Down Expand Up @@ -350,6 +430,10 @@ function train_batched!(
else
batchsize
end
if N_bs >= N
train_unbatched!(g, p, _chn, X, opt, iters)
return p
end
tgt_batch_len = tsprod(Base.front(size(tgt))) * N_bs
X_batch_len = tsprod(Base.front(sx)) * N_bs
sxb = (Base.front(sx)..., N_bs)
Expand Down Expand Up @@ -383,6 +467,8 @@ function train_batched!(
doff = 0
while true
doffnext = doff + N_bs
# doffnext > N && break
# batchstop = doffnext
batchstop::Int = min(doffnext, N)
# @show doff:batchstop
shuffle_update!(
Expand Down
10 changes: 10 additions & 0 deletions src/penalty.jl
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,11 @@ getλ(::NoPenalty) = nothing
@inline apply_penalty(Λ, p, _) = apply_penalty(Λ, p)
@inline apply_penalty!(g, Λ, p, _) = apply_penalty!(g, Λ, p)

"""
L1Penalty(λ)
Applies a L1 penalty of `λ` to parameters, i.e. penalizing by their absolute value.
"""
struct L1Penalty{NN,T} <: AbstractPenalty{NN}
chn::NN
λ::T
Expand Down Expand Up @@ -109,6 +114,11 @@ function apply_penalty!(
l
end

"""
L2Penalty(λ)
Applies a L2 penalty of `λ` to parameters, i.e. penalizing by their squares.
"""
struct L2Penalty{NN,T} <: AbstractPenalty{NN}
chn::NN
λ::T
Expand Down
36 changes: 36 additions & 0 deletions src/simple_chain.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,29 @@
struct InputDimUnknown end
const InputDim = Union{InputDimUnknown,Tuple{Vararg{Integer}}}

"""
SimpleChain([inputdim::Union{Integer,Tuple{Vararg{Integer}}, ] layers)
Construct a SimpleChain. Optional `input dims` argument allows `SimpleChains` to check
the size of inputs. Making these `static` will allow `SimpleChains` to infer size
and loop bounds at compile time.
Batch size generally should not be included in the `input dim`.
If `inputdim` is not specified, some methods, e.g. `init_params`, will require
passing the size as an additional argument, because the number of parameters may be
a function of the input size (e.g., for a `TurboDense` layer).
The `layers` argument holds various `SimpleChains` layers, e.g. `TurboDense`, `Conv`,
`Activation`, `Flatten`, `Dropout`, or `MaxPool`. It may optionally terminate in an
`AbstractLoss` layer.
These objects are callable, e.g.
```julia
c = SimpleChain(...);
p = SimpleChains.init_params(c);
c(X, p) # X are the independent variables, and `p` the parameter vector.
```
"""
struct SimpleChain{N,I<:InputDim,L<:Tuple{Vararg{Any,N}}}
inputdim::I
layers::L
Expand Down Expand Up @@ -224,6 +247,13 @@ function chain_input_dims(chn::SimpleChain, inputdim::Tuple{Vararg{Integer}})
_try_static(chain_input_dims(chn), inputdim)
end


"""
SimpleChains.init_params!(chn, p, id = nothing)
Randomly initializes parameter vector `p` with input dim `id`. Input dim does not need to be specified if these were provided to the chain object itself.
See the documentation of the individual layers to see how they are initialized, but it is generally via (Xavier) Glorot uniform or normal distributions.
"""
function init_params!(chn::SimpleChain, x::AbstractVector, id = nothing)
GC.@preserve x init_params!(chn.layers, pointer(x), chain_input_dims(chn, id))
return x
Expand All @@ -241,6 +271,12 @@ function init_params(
_id = chain_input_dims(Λ, id)
init_params!(Λ, Vector{T}(undef, numparam(Λ, id)), chain_input_dims(Λ, _id))
end
"""
SimpleChains.init_params(chn[, id = nothing][, ::Type{T} = Float32])
Creates a parameter vector of element type `T` with size matching that by `id` (argument not reguired if provided to the `chain` object itself.
See the documentation of the individual layers to see how they are initialized, but it is generally via (Xavier) Glorot uniform or normal distributions.
"""
function init_params::SimpleChain, ::Type{T}) where {T}
init_params(Λ, nothing, T)
end
Expand Down
Loading

3 comments on commit 2485545

@chriselrod
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ChrisRackauckas new release so users have docstrings?

@ChrisRackauckas
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator register()

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/58552

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.2.2 -m "<description of version>" 248554581c7f4e45f56b934dea42b2937abfbd76
git push origin v0.2.2

Please sign in to comment.