Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change all uses of wts to weights #570

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 0 additions & 14 deletions docs/src/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -242,17 +242,3 @@ The skeleton of a distributed predictor type is in the code
but not yet fully fleshed out. Because Julia by default uses
OpenBLAS, which is already multi-threaded on multicore machines, there
may not be much advantage in using distributed predictor types.

A ```ModResp``` type must provide methods for the ```wtres``` and
```sqrtxwts``` generics. Their values are the arguments to the
```updatebeta``` methods of the ```LinPred``` types. The
```Float64``` value returned by ```updatedelta``` is the value of the
convergence criterion.

Similarly, ```LinPred``` types must provide a method for the
```linpred``` generic. In general ```linpred``` takes an instance of
a ```LinPred``` type and a step factor. Methods that take only an instance
of a ```LinPred``` type use a default step factor of 1. The value of
```linpred``` is the argument to the ```updatemu``` method for
```ModResp``` types. The ```updatemu``` method returns the updated
deviance.
2 changes: 1 addition & 1 deletion src/GLM.jl
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ module GLM
If `method=:cholesky` (the default), then the `Cholesky` decomposition method will be used.
If `method=:qr`, then the `QR` decomposition method (which is more stable
but slower) will be used.
- `wts::Vector=similar(y,0)`: Prior frequency (a.k.a. case) weights of observations.
- `weights::Vector=similar(y,0)`: Prior frequency (a.k.a. case) weights of observations.
Such weights are equivalent to repeating each observation a number of times equal
to its weight. Do note that this interpretation gives equal point estimates but
different standard errors from analytical (a.k.a. inverse variance) weights and
Expand Down
132 changes: 66 additions & 66 deletions src/glmfit.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,19 +17,19 @@ struct GlmResp{V<:FPVector,D<:UnivariateDistribution,L<:Link} <: ModResp
mu::V
"`offset:` offset added to `Xβ` to form `eta`. Can be of length 0"
offset::V
"`wts:` prior case weights. Can be of length 0."
wts::V
"`weights:` prior case weights. Can be of length 0."
weights::V
"`wrkwt`: working case weights for the Iteratively Reweighted Least Squares (IRLS) algorithm"
wrkwt::V
"`wrkresid`: working residuals for IRLS"
wrkresid::V
end

function GlmResp(y::V, d::D, l::L, η::V, μ::V, off::V, wts::V) where {V<:FPVector, D, L}
function GlmResp(y::V, d::D, l::L, η::V, μ::V, off::V, weights::V) where {V<:FPVector, D, L}
n = length(y)
nη = length(η)
nμ = length(μ)
lw = length(wts)
lw = length(weights)
lo = length(off)

# Check y values
Expand All @@ -40,33 +40,33 @@ function GlmResp(y::V, d::D, l::L, η::V, μ::V, off::V, wts::V) where {V<:FPVec
throw(DimensionMismatch("lengths of η, μ, and y ($nη, $nμ, $n) are not equal"))
end

# Lengths of wts and off can be either n or 0
# Lengths of weights and off can be either n or 0
if lw != 0 && lw != n
throw(DimensionMismatch("wts must have length $n or length 0 but was $lw"))
throw(DimensionMismatch("weights must have length $n or length 0 but was $lw"))
end
if lo != 0 && lo != n
throw(DimensionMismatch("offset must have length $n or length 0 but was $lo"))
end

return GlmResp{V,D,L}(y, d, l, similar(y), η, μ, off, wts, similar(y), similar(y))
return GlmResp{V,D,L}(y, d, l, similar(y), η, μ, off, weights, similar(y), similar(y))
end

function GlmResp(y::FPVector, d::Distribution, l::Link, off::FPVector, wts::FPVector)
function GlmResp(y::FPVector, d::Distribution, l::Link, off::FPVector, weights::FPVector)
# Instead of convert(Vector{Float64}, y) to be more ForwardDiff friendly
_y = convert(Vector{float(eltype(y))}, y)
_off = convert(Vector{float(eltype(off))}, off)
_wts = convert(Vector{float(eltype(wts))}, wts)
_weights = convert(Vector{float(eltype(weights))}, weights)
η = similar(_y)
μ = similar(_y)
r = GlmResp(_y, d, l, η, μ, _off, _wts)
initialeta!(r.eta, d, l, _y, _wts, _off)
r = GlmResp(_y, d, l, η, μ, _off, _weights)
initialeta!(r.eta, d, l, _y, _weights, _off)
updateμ!(r, r.eta)
return r
end

function GlmResp(y::AbstractVector{<:Real}, d::D, l::L, off::AbstractVector{<:Real},
wts::AbstractVector{<:Real}) where {D, L}
GlmResp(float(y), d, l, float(off), float(wts))
weights::AbstractVector{<:Real}) where {D, L}
GlmResp(float(y), d, l, float(off), float(weights))
end

deviance(r::GlmResp) = sum(r.devresid)
Expand Down Expand Up @@ -97,9 +97,9 @@ function updateμ! end
function updateμ!(r::GlmResp{T}, linPr::T) where T<:FPVector
isempty(r.offset) ? copyto!(r.eta, linPr) : broadcast!(+, r.eta, linPr, r.offset)
updateμ!(r)
if !isempty(r.wts)
map!(*, r.devresid, r.devresid, r.wts)
map!(*, r.wrkwt, r.wrkwt, r.wts)
if !isempty(r.weights)
map!(*, r.devresid, r.devresid, r.weights)
map!(*, r.wrkwt, r.wrkwt, r.weights)
end
r
end
Expand Down Expand Up @@ -261,20 +261,20 @@ end
deviance(m::AbstractGLM) = deviance(m.rr)

function nulldeviance(m::GeneralizedLinearModel)
r = m.rr
wts = weights(r.wts)
y = r.y
d = r.d
offset = r.offset
hasint = hasintercept(m)
dev = zero(eltype(y))
r = m.rr
weights = weights(r.weights)
y = r.y
d = r.d
offset = r.offset
hasint = hasintercept(m)
dev = zero(eltype(y))
if isempty(offset) # Faster method
if !isempty(wts)
if !isempty(weights)
mu = hasint ?
mean(y, wts) :
linkinv(r.link, zero(eltype(y))*zero(eltype(wts))/1)
@inbounds for i in eachindex(y, wts)
dev += wts[i] * devresid(d, y[i], mu)
mean(y, weights) :
linkinv(r.link, zero(eltype(y))*zero(eltype(weights))/1)
@inbounds for i in eachindex(y, weights)
dev += weights[i] * devresid(d, y[i], mu)
end
else
mu = hasint ? mean(y) : linkinv(r.link, zero(eltype(y))/1)
Expand All @@ -285,7 +285,7 @@ function nulldeviance(m::GeneralizedLinearModel)
else
X = fill(1.0, length(y), hasint ? 1 : 0)
nullm = fit(GeneralizedLinearModel,
X, y, d, r.link; wts=wts, offset=offset,
X, y, d, r.link; weights=weights, offset=offset,
dropcollinear=ispivoted(m.pp),
method=decomposition_method(m.pp),
maxiter=m.maxiter, minstepfac=m.minstepfac,
Expand All @@ -296,16 +296,16 @@ function nulldeviance(m::GeneralizedLinearModel)
end

function loglikelihood(m::AbstractGLM)
r = m.rr
wts = r.wts
y = r.y
mu = r.mu
d = r.d
ll = zero(eltype(mu))
if !isempty(wts)
ϕ = deviance(m)/sum(wts)
@inbounds for i in eachindex(y, mu, wts)
ll += loglik_obs(d, y[i], mu[i], wts[i], ϕ)
r = m.rr
weights = r.weights
y = r.y
mu = r.mu
d = r.d
ll = zero(eltype(mu))
if !isempty(weights)
ϕ = deviance(m)/sum(weights)
@inbounds for i in eachindex(y, mu, weights)
ll += loglik_obs(d, y[i], mu[i], weights[i], ϕ)
end
else
ϕ = deviance(m)/length(y)
Expand All @@ -317,19 +317,19 @@ function loglikelihood(m::AbstractGLM)
end

function nullloglikelihood(m::GeneralizedLinearModel)
r = m.rr
wts = r.wts
y = r.y
d = r.d
offset = r.offset
hasint = hasintercept(m)
r = m.rr
weights = r.weights
y = r.y
d = r.d
offset = r.offset
hasint = hasintercept(m)
ll = zero(eltype(y))
if isempty(r.offset) # Faster method
if !isempty(wts)
mu = hasint ? mean(y, weights(wts)) : linkinv(r.link, zero(ll)/1)
ϕ = nulldeviance(m)/sum(wts)
@inbounds for i in eachindex(y, wts)
ll += loglik_obs(d, y[i], mu, wts[i], ϕ)
if !isempty(weights)
mu = hasint ? mean(y, weights(weights)) : linkinv(r.link, zero(ll)/1)
ϕ = nulldeviance(m)/sum(weights)
@inbounds for i in eachindex(y, weights)
ll += loglik_obs(d, y[i], mu, weights[i], ϕ)
end
else
mu = hasint ? mean(y) : linkinv(r.link, zero(ll)/1)
Expand All @@ -341,7 +341,7 @@ function nullloglikelihood(m::GeneralizedLinearModel)
else
X = fill(1.0, length(y), hasint ? 1 : 0)
nullm = fit(GeneralizedLinearModel,
X, y, d, r.link; wts=wts, offset=offset,
X, y, d, r.link; weights=weights, offset=offset,
dropcollinear=ispivoted(m.pp),
method=decomposition_method(m.pp),
maxiter=m.maxiter, minstepfac=m.minstepfac,
Expand Down Expand Up @@ -466,7 +466,7 @@ end

function StatsBase.fit!(m::AbstractGLM,
y;
wts=nothing,
weights=nothing,
offset=nothing,
verbose::Bool=false,
maxiter::Integer=30,
Expand Down Expand Up @@ -498,9 +498,9 @@ function StatsBase.fit!(m::AbstractGLM,
r = m.rr
V = typeof(r.y)
r.y = copy!(r.y, y)
isa(wts, Nothing) || copy!(r.wts, wts)
isa(weights, Nothing) || copy!(r.weights, weights)
isa(offset, Nothing) || copy!(r.offset, offset)
initialeta!(r.eta, r.d, r.l, r.y, r.wts, r.offset)
initialeta!(r.eta, r.d, r.l, r.y, r.weights, r.offset)
updateμ!(r, r.eta)
fill!(m.pp.beta0, 0)
m.fit = false
Expand Down Expand Up @@ -560,8 +560,8 @@ function fit(::Type{M},
dropcollinear::Bool = true,
method::Symbol = :cholesky,
dofit::Union{Bool, Nothing} = nothing,
wts::AbstractVector{<:Real} = similar(y, 0),
offset::AbstractVector{<:Real} = similar(y, 0),
weights::AbstractVector{<:Real} = similar(y, 0),
offset::AbstractVector{<:Real} = similar(y, 0),
fitargs...) where {M<:AbstractGLM}
if dofit === nothing
dofit = true
Expand All @@ -574,7 +574,7 @@ function fit(::Type{M},
throw(DimensionMismatch("number of rows in X and y must match"))
end

rr = GlmResp(y, d, l, offset, wts)
rr = GlmResp(y, d, l, offset, weights)

if method === :cholesky
res = M(rr, cholpred(X, dropcollinear), nothing, false)
Expand All @@ -600,7 +600,7 @@ function fit(::Type{M},
d::UnivariateDistribution,
l::Link=canonicallink(d);
offset::Union{AbstractVector, Nothing} = nothing,
wts::Union{AbstractVector, Nothing} = nothing,
weights::Union{AbstractVector, Nothing} = nothing,
dropcollinear::Bool = true,
method::Symbol = :cholesky,
dofit::Union{Bool, Nothing} = nothing,
Expand All @@ -620,9 +620,9 @@ function fit(::Type{M},
end

off = offset === nothing ? similar(y, 0) : offset
wts = wts === nothing ? similar(y, 0) : wts
rr = GlmResp(y, d, l, off, wts)
weights = weights === nothing ? similar(y, 0) : weights
rr = GlmResp(y, d, l, off, weights)

if method === :cholesky
res = M(rr, cholpred(X, dropcollinear), f, false)
elseif method === :qr
Expand Down Expand Up @@ -791,17 +791,17 @@ function initialeta!(eta::AbstractVector,
dist::UnivariateDistribution,
link::Link,
y::AbstractVector,
wts::AbstractVector,
weights::AbstractVector,
off::AbstractVector)


n = length(y)
lw = length(wts)
lw = length(weights)
lo = length(off)

if lw == n
@inbounds @simd for i = eachindex(y, eta, wts)
μ = mustart(dist, y[i], wts[i])
@inbounds @simd for i = eachindex(y, eta, weights)
μ = mustart(dist, y[i], weights[i])
eta[i] = linkfun(link, μ)
end
elseif lw == 0
Expand All @@ -810,7 +810,7 @@ function initialeta!(eta::AbstractVector,
eta[i] = linkfun(link, μ)
end
else
throw(ArgumentError("length of wts must be either $n or 0 but was $lw"))
throw(ArgumentError("length of weights must be either $n or 0 but was $lw"))
end

if lo == n
Expand Down
6 changes: 3 additions & 3 deletions src/linpred.jl
Original file line number Diff line number Diff line change
Expand Up @@ -375,10 +375,10 @@ For linear and generalized linear models, returns the number of rows, or,
when prior weights are specified, the sum of weights.
"""
function nobs(obj::LinPredModel)
if isempty(obj.rr.wts)
oftype(sum(one(eltype(obj.rr.wts))), length(obj.rr.y))
if isempty(obj.rr.weights)
oftype(sum(one(eltype(obj.rr.weights))), length(obj.rr.y))
else
sum(obj.rr.wts)
sum(obj.rr.weights)
end
end

Expand Down
Loading
Loading