diff --git a/docs/src/index.md b/docs/src/index.md
index 65fd104f..4d2576ea 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -242,17 +242,3 @@ The skeleton of a distributed predictor type is in the code
 but not yet fully fleshed out.  Because Julia by default uses
 OpenBLAS, which is already multi-threaded on multicore machines, there
 may not be much advantage in using distributed predictor types.
-
-A ```ModResp``` type must provide methods for the ```wtres``` and
-```sqrtxwts``` generics.  Their values are the arguments to the
-```updatebeta``` methods of the ```LinPred``` types.  The
-```Float64``` value returned by ```updatedelta``` is the value of the
-convergence criterion.
-
-Similarly, ```LinPred``` types must provide a method for the
-```linpred``` generic.  In general ```linpred``` takes an instance of
-a ```LinPred``` type and a step factor.  Methods that take only an instance
-of a ```LinPred``` type use a default step factor of 1.  The value of
-```linpred``` is the argument to the ```updatemu``` method for
-```ModResp``` types.  The ```updatemu``` method returns the updated
-deviance.
diff --git a/src/GLM.jl b/src/GLM.jl
index 59c327db..390f1976 100644
--- a/src/GLM.jl
+++ b/src/GLM.jl
@@ -109,7 +109,7 @@ module GLM
           If `method=:cholesky` (the default), then the `Cholesky` decomposition method will be used.
           If `method=:qr`, then the `QR` decomposition method (which is more stable
           but slower) will be used.
-        - `wts::Vector=similar(y,0)`: Prior frequency (a.k.a. case) weights of observations.
+        - `weights::Vector=similar(y,0)`: Prior frequency (a.k.a. case) weights of observations.
           Such weights are equivalent to repeating each observation a number of times equal
           to its weight. Do note that this interpretation gives equal point estimates but
           different standard errors from analytical (a.k.a. inverse variance) weights and
diff --git a/src/glmfit.jl b/src/glmfit.jl
index 5406fcbb..03e19f9c 100644
--- a/src/glmfit.jl
+++ b/src/glmfit.jl
@@ -17,19 +17,19 @@ struct GlmResp{V<:FPVector,D<:UnivariateDistribution,L<:Link} <: ModResp
     mu::V
     "`offset:` offset added to `Xβ` to form `eta`.  Can be of length 0"
     offset::V
-    "`wts:` prior case weights.  Can be of length 0."
-    wts::V
+    "`weights:` prior case weights.  Can be of length 0."
+    weights::V
     "`wrkwt`: working case weights for the Iteratively Reweighted Least Squares (IRLS) algorithm"
     wrkwt::V
     "`wrkresid`: working residuals for IRLS"
     wrkresid::V
 end
 
-function GlmResp(y::V, d::D, l::L, η::V, μ::V, off::V, wts::V) where {V<:FPVector, D, L}
+function GlmResp(y::V, d::D, l::L, η::V, μ::V, off::V, weights::V) where {V<:FPVector, D, L}
     n  = length(y)
     nη = length(η)
     nμ = length(μ)
-    lw = length(wts)
+    lw = length(weights)
     lo = length(off)
 
     # Check y values
@@ -40,33 +40,33 @@ function GlmResp(y::V, d::D, l::L, η::V, μ::V, off::V, wts::V) where {V<:FPVec
         throw(DimensionMismatch("lengths of η, μ, and y ($nη, $nμ, $n) are not equal"))
     end
 
-    # Lengths of wts and off can be either n or 0
+    # Lengths of weights and off can be either n or 0
     if lw != 0 && lw != n
-        throw(DimensionMismatch("wts must have length $n or length 0 but was $lw"))
+        throw(DimensionMismatch("weights must have length $n or length 0 but was $lw"))
     end
     if lo != 0 && lo != n
         throw(DimensionMismatch("offset must have length $n or length 0 but was $lo"))
     end
 
-    return GlmResp{V,D,L}(y, d, l, similar(y), η, μ, off, wts, similar(y), similar(y))
+    return GlmResp{V,D,L}(y, d, l, similar(y), η, μ, off, weights, similar(y), similar(y))
 end
 
-function GlmResp(y::FPVector, d::Distribution, l::Link, off::FPVector, wts::FPVector)
+function GlmResp(y::FPVector, d::Distribution, l::Link, off::FPVector, weights::FPVector)
     # Instead of convert(Vector{Float64}, y) to be more ForwardDiff friendly
     _y   = convert(Vector{float(eltype(y))}, y)
     _off = convert(Vector{float(eltype(off))}, off)
-    _wts = convert(Vector{float(eltype(wts))}, wts)
+    _weights = convert(Vector{float(eltype(weights))}, weights)
     η    = similar(_y)
     μ    = similar(_y)
-    r    = GlmResp(_y, d, l, η, μ, _off, _wts)
-    initialeta!(r.eta, d, l, _y, _wts, _off)
+    r    = GlmResp(_y, d, l, η, μ, _off, _weights)
+    initialeta!(r.eta, d, l, _y, _weights, _off)
     updateμ!(r, r.eta)
     return r
 end
 
 function GlmResp(y::AbstractVector{<:Real}, d::D, l::L, off::AbstractVector{<:Real},
-                 wts::AbstractVector{<:Real}) where {D, L}
-        GlmResp(float(y), d, l, float(off), float(wts))
+                 weights::AbstractVector{<:Real}) where {D, L}
+        GlmResp(float(y), d, l, float(off), float(weights))
 end
 
 deviance(r::GlmResp) = sum(r.devresid)
@@ -97,9 +97,9 @@ function updateμ! end
 function updateμ!(r::GlmResp{T}, linPr::T) where T<:FPVector
     isempty(r.offset) ? copyto!(r.eta, linPr) : broadcast!(+, r.eta, linPr, r.offset)
     updateμ!(r)
-    if !isempty(r.wts)
-        map!(*, r.devresid, r.devresid, r.wts)
-        map!(*, r.wrkwt, r.wrkwt, r.wts)
+    if !isempty(r.weights)
+        map!(*, r.devresid, r.devresid, r.weights)
+        map!(*, r.wrkwt, r.wrkwt, r.weights)
     end
     r
 end
@@ -261,20 +261,20 @@ end
 deviance(m::AbstractGLM) = deviance(m.rr)
 
 function nulldeviance(m::GeneralizedLinearModel)
-    r      = m.rr
-    wts    = weights(r.wts)
-    y      = r.y
-    d      = r.d
-    offset = r.offset
-    hasint = hasintercept(m)
-    dev    = zero(eltype(y))
+    r       = m.rr
+    weights = weights(r.weights)
+    y       = r.y
+    d       = r.d
+    offset  = r.offset
+    hasint  = hasintercept(m)
+    dev     = zero(eltype(y))
     if isempty(offset) # Faster method
-        if !isempty(wts)
+        if !isempty(weights)
             mu = hasint ?
-                mean(y, wts) :
-                linkinv(r.link, zero(eltype(y))*zero(eltype(wts))/1)
-            @inbounds for i in eachindex(y, wts)
-                dev += wts[i] * devresid(d, y[i], mu)
+                mean(y, weights) :
+                linkinv(r.link, zero(eltype(y))*zero(eltype(weights))/1)
+            @inbounds for i in eachindex(y, weights)
+                dev += weights[i] * devresid(d, y[i], mu)
             end
         else
             mu = hasint ? mean(y) : linkinv(r.link, zero(eltype(y))/1)
@@ -285,7 +285,7 @@ function nulldeviance(m::GeneralizedLinearModel)
     else
         X = fill(1.0, length(y), hasint ? 1 : 0)
         nullm = fit(GeneralizedLinearModel,
-                    X, y, d, r.link; wts=wts, offset=offset,
+                    X, y, d, r.link; weights=weights, offset=offset,
                     dropcollinear=ispivoted(m.pp),
                     method=decomposition_method(m.pp),
                     maxiter=m.maxiter, minstepfac=m.minstepfac,
@@ -296,16 +296,16 @@ function nulldeviance(m::GeneralizedLinearModel)
 end
 
 function loglikelihood(m::AbstractGLM)
-    r   = m.rr
-    wts = r.wts
-    y   = r.y
-    mu  = r.mu
-    d   = r.d
-    ll  = zero(eltype(mu))
-    if !isempty(wts)
-        ϕ = deviance(m)/sum(wts)
-        @inbounds for i in eachindex(y, mu, wts)
-            ll += loglik_obs(d, y[i], mu[i], wts[i], ϕ)
+    r       = m.rr
+    weights = r.weights
+    y       = r.y
+    mu      = r.mu
+    d       = r.d
+    ll      = zero(eltype(mu))
+    if !isempty(weights)
+        ϕ = deviance(m)/sum(weights)
+        @inbounds for i in eachindex(y, mu, weights)
+            ll += loglik_obs(d, y[i], mu[i], weights[i], ϕ)
         end
     else
         ϕ = deviance(m)/length(y)
@@ -317,19 +317,19 @@ function loglikelihood(m::AbstractGLM)
 end
 
 function nullloglikelihood(m::GeneralizedLinearModel)
-    r      = m.rr
-    wts    = r.wts
-    y      = r.y
-    d      = r.d
-    offset = r.offset
-    hasint = hasintercept(m)
+    r       = m.rr
+    weights = r.weights
+    y       = r.y
+    d       = r.d
+    offset  = r.offset
+    hasint  = hasintercept(m)
     ll  = zero(eltype(y))
     if isempty(r.offset) # Faster method
-        if !isempty(wts)
-            mu = hasint ? mean(y, weights(wts)) : linkinv(r.link, zero(ll)/1)
-            ϕ = nulldeviance(m)/sum(wts)
-            @inbounds for i in eachindex(y, wts)
-                ll += loglik_obs(d, y[i], mu, wts[i], ϕ)
+        if !isempty(weights)
+            mu = hasint ? mean(y, weights(weights)) : linkinv(r.link, zero(ll)/1)
+            ϕ = nulldeviance(m)/sum(weights)
+            @inbounds for i in eachindex(y, weights)
+                ll += loglik_obs(d, y[i], mu, weights[i], ϕ)
             end
         else
             mu = hasint ? mean(y) : linkinv(r.link, zero(ll)/1)
@@ -341,7 +341,7 @@ function nullloglikelihood(m::GeneralizedLinearModel)
     else
         X = fill(1.0, length(y), hasint ? 1 : 0)
         nullm = fit(GeneralizedLinearModel,
-                    X, y, d, r.link; wts=wts, offset=offset,
+                    X, y, d, r.link; weights=weights, offset=offset,
                     dropcollinear=ispivoted(m.pp),
                     method=decomposition_method(m.pp),
                     maxiter=m.maxiter, minstepfac=m.minstepfac,
@@ -466,7 +466,7 @@ end
 
 function StatsBase.fit!(m::AbstractGLM,
                         y;
-                        wts=nothing,
+                        weights=nothing,
                         offset=nothing,
                         verbose::Bool=false,
                         maxiter::Integer=30,
@@ -498,9 +498,9 @@ function StatsBase.fit!(m::AbstractGLM,
     r = m.rr
     V = typeof(r.y)
     r.y = copy!(r.y, y)
-    isa(wts, Nothing) || copy!(r.wts, wts)
+    isa(weights, Nothing) || copy!(r.weights, weights)
     isa(offset, Nothing) || copy!(r.offset, offset)
-    initialeta!(r.eta, r.d, r.l, r.y, r.wts, r.offset)
+    initialeta!(r.eta, r.d, r.l, r.y, r.weights, r.offset)
     updateμ!(r, r.eta)
     fill!(m.pp.beta0, 0)
     m.fit = false
@@ -560,8 +560,8 @@ function fit(::Type{M},
     dropcollinear::Bool = true,
     method::Symbol = :cholesky,
     dofit::Union{Bool, Nothing} = nothing,
-    wts::AbstractVector{<:Real}      = similar(y, 0),
-    offset::AbstractVector{<:Real}   = similar(y, 0),
+    weights::AbstractVector{<:Real} = similar(y, 0),
+    offset::AbstractVector{<:Real}  = similar(y, 0),
     fitargs...) where {M<:AbstractGLM}
     if dofit === nothing
         dofit = true
@@ -574,7 +574,7 @@ function fit(::Type{M},
         throw(DimensionMismatch("number of rows in X and y must match"))
     end
 
-    rr = GlmResp(y, d, l, offset, wts)
+    rr = GlmResp(y, d, l, offset, weights)
 
     if method === :cholesky
         res = M(rr, cholpred(X, dropcollinear), nothing, false)
@@ -600,7 +600,7 @@ function fit(::Type{M},
              d::UnivariateDistribution,
              l::Link=canonicallink(d);
              offset::Union{AbstractVector, Nothing} = nothing,
-             wts::Union{AbstractVector, Nothing} = nothing,
+             weights::Union{AbstractVector, Nothing} = nothing,
              dropcollinear::Bool = true,
              method::Symbol = :cholesky,
              dofit::Union{Bool, Nothing} = nothing,
@@ -620,9 +620,9 @@ function fit(::Type{M},
     end
 
     off = offset === nothing ? similar(y, 0) : offset
-    wts = wts === nothing ? similar(y, 0) : wts
-    rr = GlmResp(y, d, l, off, wts)
-    
+    weights = weights === nothing ? similar(y, 0) : weights
+    rr = GlmResp(y, d, l, off, weights)
+
     if method === :cholesky
         res = M(rr, cholpred(X, dropcollinear), f, false)
     elseif method === :qr
@@ -791,17 +791,17 @@ function initialeta!(eta::AbstractVector,
                     dist::UnivariateDistribution,
                     link::Link,
                     y::AbstractVector,
-                    wts::AbstractVector,
+                    weights::AbstractVector,
                     off::AbstractVector)
 
 
     n  = length(y)
-    lw = length(wts)
+    lw = length(weights)
     lo = length(off)
 
     if lw == n
-        @inbounds @simd for i = eachindex(y, eta, wts)
-            μ      = mustart(dist, y[i], wts[i])
+        @inbounds @simd for i = eachindex(y, eta, weights)
+            μ      = mustart(dist, y[i], weights[i])
             eta[i] = linkfun(link, μ)
         end
     elseif lw == 0
@@ -810,7 +810,7 @@ function initialeta!(eta::AbstractVector,
             eta[i] = linkfun(link, μ)
         end
     else
-        throw(ArgumentError("length of wts must be either $n or 0 but was $lw"))
+        throw(ArgumentError("length of weights must be either $n or 0 but was $lw"))
     end
 
     if lo == n
diff --git a/src/linpred.jl b/src/linpred.jl
index 9973d5d2..56724bc9 100644
--- a/src/linpred.jl
+++ b/src/linpred.jl
@@ -375,10 +375,10 @@ For linear and generalized linear models, returns the number of rows, or,
 when prior weights are specified, the sum of weights.
 """
 function nobs(obj::LinPredModel)
-    if isempty(obj.rr.wts)
-        oftype(sum(one(eltype(obj.rr.wts))), length(obj.rr.y))
+    if isempty(obj.rr.weights)
+        oftype(sum(one(eltype(obj.rr.weights))), length(obj.rr.y))
     else
-        sum(obj.rr.wts)
+        sum(obj.rr.weights)
     end
 end
 
diff --git a/src/lm.jl b/src/lm.jl
index f337862c..df13cb37 100644
--- a/src/lm.jl
+++ b/src/lm.jl
@@ -7,36 +7,36 @@ Encapsulates the response for a linear model
 
 - `mu`: current value of the mean response vector or fitted value
 - `offset`: optional offset added to the linear predictor to form `mu`
-- `wts`: optional vector of prior frequency (a.k.a. case) weights for observations
+- `weights`: optional vector of prior frequency (a.k.a. case) weights for observations
 - `y`: observed response vector
 
-Either or both `offset` and `wts` may be of length 0
+Either or both `offset` and `weights` may be of length 0
 """
 mutable struct LmResp{V<:FPVector} <: ModResp  # response in a linear model
     mu::V                                  # mean response
     offset::V                              # offset added to linear predictor (may have length 0)
-    wts::V                                 # prior weights (may have length 0)
+    weights::V                             # prior weights (may have length 0)
     y::V                                   # response
-    function LmResp{V}(mu::V, off::V, wts::V, y::V) where V
+    function LmResp{V}(mu::V, off::V, weights::V, y::V) where V
         n = length(y)
         length(mu) == n || error("mismatched lengths of mu and y")
         ll = length(off)
         ll == 0 || ll == n || error("length of offset is $ll, must be $n or 0")
-        ll = length(wts)
-        ll == 0 || ll == n || error("length of wts is $ll, must be $n or 0")
-        new{V}(mu, off, wts, y)
+        ll = length(weights)
+        ll == 0 || ll == n || error("length of weights is $ll, must be $n or 0")
+        new{V}(mu, off, weights, y)
     end
 end
 
-function LmResp(y::AbstractVector{<:Real}, wts::Union{Nothing,AbstractVector{<:Real}}=nothing)
+function LmResp(y::AbstractVector{<:Real}, weights::Union{Nothing,AbstractVector{<:Real}}=nothing)
     # Instead of convert(Vector{Float64}, y) to be more ForwardDiff friendly
     _y = convert(Vector{float(eltype(y))}, y)
-    _wts = if wts === nothing
+    _weights = if weights === nothing
         similar(_y, 0)
     else
-        convert(Vector{float(eltype(wts))}, wts)
+        convert(Vector{float(eltype(weights))}, weights)
     end
-    return LmResp{typeof(_y)}(zero(_y), zero(_y), _wts, _y)
+    return LmResp{typeof(_y)}(zero(_y), zero(_y), _weights, _y)
 end
 
 function updateμ!(r::LmResp{V}, linPr::V) where V<:FPVector
@@ -51,22 +51,22 @@ updateμ!(r::LmResp{V}, linPr) where {V<:FPVector} = updateμ!(r, convert(V, vec
 function deviance(r::LmResp)
     y = r.y
     mu = r.mu
-    wts = r.wts
-    v = zero(eltype(y)) + zero(eltype(y)) * zero(eltype(wts))
-    if isempty(wts)
+    weights = r.weights
+    v = zero(eltype(y)) + zero(eltype(y)) * zero(eltype(weights))
+    if isempty(weights)
         @inbounds @simd for i = eachindex(y,mu)
             v += abs2(y[i] - mu[i])
         end
     else
-        @inbounds @simd for i = eachindex(y,mu,wts)
-            v += abs2(y[i] - mu[i])*wts[i]
+        @inbounds @simd for i = eachindex(y,mu,weights)
+            v += abs2(y[i] - mu[i])*weights[i]
         end
     end
     v
 end
 
 function loglikelihood(r::LmResp)
-    n = isempty(r.wts) ? length(r.y) : sum(r.wts)
+    n = isempty(r.weights) ? length(r.y) : sum(r.weights)
     -n/2 * (log(2π * deviance(r)/n) + 1)
 end
 
@@ -93,10 +93,10 @@ end
 LinearAlgebra.cholesky(x::LinearModel) = cholesky(x.pp)
 
 function StatsBase.fit!(obj::LinearModel)
-    if isempty(obj.rr.wts)
+    if isempty(obj.rr.weights)
         delbeta!(obj.pp, obj.rr.y)
     else
-        delbeta!(obj.pp, obj.rr.y, obj.rr.wts)
+        delbeta!(obj.pp, obj.rr.y, obj.rr.weights)
     end
     obj.pp.beta0 .= obj.pp.delbeta
     updateμ!(obj.rr, linpred(obj.pp, zero(eltype(obj.rr.y))))
@@ -117,10 +117,10 @@ const FIT_LM_DOC = """
 
 """
     fit(LinearModel, formula::FormulaTerm, data;
-        [wts::AbstractVector], dropcollinear::Bool=true, method::Symbol=:cholesky,
+        [weights::AbstractVector], dropcollinear::Bool=true, method::Symbol=:cholesky,
         contrasts::AbstractDict{Symbol}=Dict{Symbol,Any}())
     fit(LinearModel, X::AbstractMatrix, y::AbstractVector;
-        wts::AbstractVector=similar(y, 0), dropcollinear::Bool=true, method::Symbol=:cholesky)
+        weights::AbstractVector=similar(y, 0), dropcollinear::Bool=true, method::Symbol=:cholesky)
 
 Fit a linear model to data.
 
@@ -128,7 +128,7 @@ $FIT_LM_DOC
 """
 function fit(::Type{LinearModel}, X::AbstractMatrix{<:Real}, y::AbstractVector{<:Real},
              allowrankdeficient_dep::Union{Bool,Nothing}=nothing;
-             wts::AbstractVector{<:Real}=similar(y, 0),
+             weights::AbstractVector{<:Real}=similar(y, 0),
              dropcollinear::Bool=true,
              method::Symbol=:cholesky)
     if allowrankdeficient_dep !== nothing
@@ -136,11 +136,11 @@ function fit(::Type{LinearModel}, X::AbstractMatrix{<:Real}, y::AbstractVector{<
               "argument `dropcollinear` instead. Proceeding with positional argument value: $allowrankdeficient_dep"
         dropcollinear = allowrankdeficient_dep
     end
-    
+
     if method === :cholesky
-        fit!(LinearModel(LmResp(y, wts), cholpred(X, dropcollinear), nothing))
+        fit!(LinearModel(LmResp(y, weights), cholpred(X, dropcollinear), nothing))
     elseif method === :qr
-        fit!(LinearModel(LmResp(y, wts), qrpred(X, dropcollinear), nothing))
+        fit!(LinearModel(LmResp(y, weights), qrpred(X, dropcollinear), nothing))
     else
         throw(ArgumentError("The only supported values for keyword argument `method` are `:cholesky` and `:qr`."))
     end
@@ -148,17 +148,17 @@ end
 
 function fit(::Type{LinearModel}, f::FormulaTerm, data,
              allowrankdeficient_dep::Union{Bool,Nothing}=nothing;
-             wts::Union{AbstractVector{<:Real}, Nothing}=nothing,
+             weights::Union{AbstractVector{<:Real}, Nothing}=nothing,
              dropcollinear::Bool=true,
              method::Symbol=:cholesky,
              contrasts::AbstractDict{Symbol}=Dict{Symbol,Any}())
     f, (y, X) = modelframe(f, data, contrasts, LinearModel)
-    wts === nothing && (wts = similar(y, 0))
+    weights === nothing && (weights = similar(y, 0))
 
     if method === :cholesky
-        fit!(LinearModel(LmResp(y, wts), cholpred(X, dropcollinear), f))
+        fit!(LinearModel(LmResp(y, weights), cholpred(X, dropcollinear), f))
     elseif method === :qr
-        fit!(LinearModel(LmResp(y, wts), qrpred(X, dropcollinear), f))
+        fit!(LinearModel(LmResp(y, weights), qrpred(X, dropcollinear), f))
     else
         throw(ArgumentError("The only supported values for keyword argument `method` are `:cholesky` and `:qr`."))
     end
@@ -166,13 +166,13 @@ end
 
 """
     lm(formula, data;
-       [wts::AbstractVector], dropcollinear::Bool=true, method::Symbol=:cholesky,
+       [weights::AbstractVector], dropcollinear::Bool=true, method::Symbol=:cholesky,
        contrasts::AbstractDict{Symbol}=Dict{Symbol,Any}())
     lm(X::AbstractMatrix, y::AbstractVector;
-       wts::AbstractVector=similar(y, 0), dropcollinear::Bool=true, method::Symbol=:cholesky)
+       weights::AbstractVector=similar(y, 0), dropcollinear::Bool=true, method::Symbol=:cholesky)
 
 Fit a linear model to data.
-An alias for `fit(LinearModel, X, y; wts=wts, dropcollinear=dropcollinear, method=method)`
+An alias for `fit(LinearModel, X, y; weights=weights, dropcollinear=dropcollinear, method=method)`
 
 $FIT_LM_DOC
 """
@@ -195,13 +195,13 @@ For linear models, the deviance of the null model is equal to the total sum of s
 """
 function nulldeviance(obj::LinearModel)
     y = obj.rr.y
-    wts = obj.rr.wts
+    weights = obj.rr.weights
 
     if hasintercept(obj)
-        if isempty(wts)
+        if isempty(weights)
             m = mean(y)
         else
-            m = mean(y, weights(wts))
+            m = mean(y, weights(weights))
         end
     else
         @warn("Starting from GLM.jl 1.8, null model is defined as having no predictor at all " *
@@ -209,14 +209,14 @@ function nulldeviance(obj::LinearModel)
         m = zero(eltype(y))
     end
 
-    v = zero(eltype(y))*zero(eltype(wts))
-    if isempty(wts)
+    v = zero(eltype(y))*zero(eltype(weights))
+    if isempty(weights)
         @inbounds @simd for yi in y
             v += abs2(yi - m)
         end
     else
-        @inbounds @simd for i = eachindex(y,wts)
-            v += abs2(y[i] - m)*wts[i]
+        @inbounds @simd for i = eachindex(y,weights)
+            v += abs2(y[i] - m)*weights[i]
         end
     end
     v
@@ -226,7 +226,7 @@ loglikelihood(obj::LinearModel) = loglikelihood(obj.rr)
 
 function nullloglikelihood(obj::LinearModel)
     r = obj.rr
-    n = isempty(r.wts) ? length(r.y) : sum(r.wts)
+    n = isempty(r.weights) ? length(r.y) : sum(r.weights)
     -n/2 * (log(2π * nulldeviance(obj)/n) + 1)
 end
 
@@ -316,7 +316,7 @@ function StatsModels.predict!(res::Union{AbstractVector,
         prediction, lower, upper = res
         length(prediction) == length(lower) == length(upper) == size(newx, 1) ||
             throw(DimensionMismatch("length of vectors in `res` must equal the number of rows in `newx`"))
-        length(mm.rr.wts) == 0 || error("prediction with confidence intervals not yet implemented for weighted regression")
+        length(mm.rr.weights) == 0 || error("prediction with confidence intervals not yet implemented for weighted regression")
 
         dev = deviance(mm)
         dofr = dof_residual(mm)
@@ -355,8 +355,8 @@ function StatsBase.cooksdistance(obj::LinearModel)
     X = modelmatrix(obj)
     XtX = crossmodelmatrix(obj)
     k == size(X,2) || throw(ArgumentError("Models with collinear terms are not currently supported."))
-    wts = obj.rr.wts
-    if isempty(wts)
+    weights = obj.rr.weights
+    if isempty(weights)
         hii = diag(X * inv(XtX) * X')
     else
         throw(ArgumentError("Weighted models are not currently supported."))
diff --git a/src/negbinfit.jl b/src/negbinfit.jl
index c73dd655..fa3d4046 100644
--- a/src/negbinfit.jl
+++ b/src/negbinfit.jl
@@ -1,23 +1,23 @@
-function mle_for_θ(y::AbstractVector, μ::AbstractVector, wts::AbstractVector;
+function mle_for_θ(y::AbstractVector, μ::AbstractVector, weights::AbstractVector;
                    maxiter=30, tol=1.e-6)
     function first_derivative(θ::Real)
         tmp(yi, μi) = (yi+θ)/(μi+θ) + log(μi+θ) - 1 - log(θ) - digamma(θ+yi) + digamma(θ)
         unit_weights ? sum(tmp(yi, μi) for (yi, μi) in zip(y, μ)) :
-                       sum(wti * tmp(yi, μi) for (wti, yi, μi) in zip(wts, y, μ))
+                       sum(wti * tmp(yi, μi) for (wti, yi, μi) in zip(weights, y, μ))
     end
     function second_derivative(θ::Real)
         tmp(yi, μi) = -(yi+θ)/(μi+θ)^2 + 2/(μi+θ) - 1/θ - trigamma(θ+yi) + trigamma(θ)
         unit_weights ? sum(tmp(yi, μi) for (yi, μi) in zip(y, μ)) :
-                       sum(wti * tmp(yi, μi) for (wti, yi, μi) in zip(wts, y, μ))
+                       sum(wti * tmp(yi, μi) for (wti, yi, μi) in zip(weights, y, μ))
     end
 
-    unit_weights = length(wts) == 0
+    unit_weights = length(weights) == 0
     if unit_weights
         n = length(y)
         θ = n / sum((yi/μi - 1)^2 for (yi, μi) in zip(y, μ))
     else
-        n = sum(wts)
-        θ = n / sum(wti * (yi/μi - 1)^2 for (wti, yi, μi) in zip(wts, y, μ))
+        n = sum(weights)
+        θ = n / sum(wti * (yi/μi - 1)^2 for (wti, yi, μi) in zip(weights, y, μ))
     end
     δ, converged = one(θ), false
 
@@ -70,7 +70,7 @@ In both cases, `link` may specify the link function
 function negbin(F,
                 D,
                 args...;
-                wts::Union{Nothing, AbstractVector}=nothing,
+                weights::Union{Nothing, AbstractVector}=nothing,
                 initialθ::Real=Inf,
                 dropcollinear::Bool=true,
                 method::Symbol=:cholesky,
@@ -108,23 +108,23 @@ function negbin(F,
     # fit a Poisson regression model if the user does not specify an initial θ
     if isinf(initialθ)
         regmodel = glm(F, D, Poisson(), args...;
-                       wts=wts, dropcollinear=dropcollinear, method=method, maxiter=maxiter,
+                       weights=weights, dropcollinear=dropcollinear, method=method, maxiter=maxiter,
                        atol=atol, rtol=rtol, verbose=verbose, kwargs...)
     else
         regmodel = glm(F, D, NegativeBinomial(initialθ), args...;
-                       wts=wts, dropcollinear=dropcollinear, method=method, maxiter=maxiter,
+                       weights=weights, dropcollinear=dropcollinear, method=method, maxiter=maxiter,
                        atol=atol, rtol=rtol, verbose=verbose, kwargs...)
     end
 
     μ = regmodel.rr.mu
     y = regmodel.rr.y
-    wts = regmodel.rr.wts
-    lw, ly = length(wts), length(y)
+    weights = regmodel.rr.weights
+    lw, ly = length(weights), length(y)
     if lw != ly && lw != 0
-        throw(ArgumentError("length of wts must be either $ly or 0 but was $lw"))
+        throw(ArgumentError("length of weights must be either $ly or 0 but was $lw"))
     end
 
-    θ = mle_for_θ(y, μ, wts; maxiter=maxiter, tol=rtol)
+    θ = mle_for_θ(y, μ, weights; maxiter=maxiter, tol=rtol)
     d = sqrt(2 * max(1, deviance(regmodel)))
     δ = one(θ)
     ll = loglikelihood(regmodel)
@@ -142,7 +142,7 @@ function negbin(F,
                        atol=atol, rtol=rtol, verbose=verbose, kwargs...)
         μ = regmodel.rr.mu
         prevθ = θ
-        θ = mle_for_θ(y, μ, wts; maxiter=maxiter, tol=rtol)
+        θ = mle_for_θ(y, μ, weights; maxiter=maxiter, tol=rtol)
         δ = prevθ - θ
         ll0 = ll
         ll = loglikelihood(regmodel)
diff --git a/test/runtests.jl b/test/runtests.jl
index 235c3745..4d3b046f 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -28,7 +28,7 @@ linreg(x::AbstractVecOrMat, y::AbstractVector) = qr!(simplemm(x)) \ y
     lm1 = fit(LinearModel, @formula(OptDen ~ Carb), form; method=dmethod)
     test_show(lm1)
     @test isapprox(coef(lm1), linreg(form.Carb, form.OptDen))
-        
+
     @test isapprox(vcov(lm1), Σ)
     @test isapprox(cor(lm1), Diagonal(diag(Σ))^(-1/2)*Σ*Diagonal(diag(Σ))^(-1/2))
     @test dof(lm1) == 3
@@ -72,10 +72,10 @@ linreg(x::AbstractVecOrMat, y::AbstractVector) = qr!(simplemm(x)) \ y
 end
 
 @testset "Cook's Distance in Linear Model with $dmethod" for dmethod in (:cholesky, :qr)
-    st_df = DataFrame( 
+    st_df = DataFrame(
         Y=[6.4, 7.4, 10.4, 15.1, 12.3 , 11.4],
         XA=[1.5, 6.5, 11.5, 19.9, 17.0, 15.5],
-        XB=[1.8, 7.8, 11.8, 20.5, 17.3, 15.8], 
+        XB=[1.8, 7.8, 11.8, 20.5, 17.3, 15.8],
         XC=[3., 13., 23., 39.8, 34., 31.],
         # values from SAS proc reg
         CooksD_base=[1.4068501943, 0.176809102, 0.0026655177, 1.0704009915, 0.0875726457, 0.1331183932], 
@@ -87,7 +87,7 @@ end
     t_lm_base = lm(@formula(Y ~ XA), st_df; method=dmethod)
     @test isapprox(st_df.CooksD_base, cooksdistance(t_lm_base))
 
-    # linear regression, no intercept 
+    # linear regression, no intercept
     t_lm_noint = lm(@formula(Y ~ XA +0), st_df; method=dmethod)
     @test isapprox(st_df.CooksD_noint, cooksdistance(t_lm_noint))
 
@@ -102,20 +102,20 @@ end
     @test_throws ArgumentError isapprox(st_df.CooksD_base, cooksdistance(t_lm_colli))
 end
 
-@testset "Linear model with weights and $dmethod" for dmethod in (:cholesky, :qr) 
+@testset "Linear model with weights and $dmethod" for dmethod in (:cholesky, :qr)
     df = dataset("quantreg", "engel")
     N = nrow(df)
     df.weights = repeat(1:5, Int(N/5))
     f = @formula(FoodExp ~ Income)
-    
-    lm_model = lm(f, df, wts = df.weights; method=dmethod)
-    glm_model = glm(f, df, Normal(), wts = df.weights; method=dmethod)
+
+    lm_model = lm(f, df, weights = df.weights; method=dmethod)
+    glm_model = glm(f, df, Normal(), weights = df.weights; method=dmethod)
     @test isapprox(coef(lm_model), [154.35104595140706, 0.4836896390157505])
     @test isapprox(coef(glm_model), [154.35104595140706, 0.4836896390157505])
     @test isapprox(stderror(lm_model), [9.382302620120193, 0.00816741377772968])
     @test isapprox(r2(lm_model), 0.8330258148644486)
     @test isapprox(adjr2(lm_model), 0.832788298242634)
-    @test isapprox(vcov(lm_model), [88.02760245551447 -0.06772589439264813; 
+    @test isapprox(vcov(lm_model), [88.02760245551447 -0.06772589439264813;
                                     -0.06772589439264813 6.670664781664879e-5])
     @test isapprox(first(predict(lm_model)), 357.57694841780994)
     @test isapprox(loglikelihood(lm_model), -4353.946729075838)
@@ -153,7 +153,7 @@ end
     X = ModelMatrix(ModelFrame(f, dfrm)).m
     y = X * (1:size(X, 2)) + 0.1 * randn(rng, size(X, 1))
     inds = deleteat!(collect(1:length(y)), 7:8)
-    
+
     m1 = fit(LinearModel, X, y; method=dmethod)
     @test isapprox(deviance(m1), 0.12160301538297297)
     Xmissingcell = X[inds, :]
@@ -172,7 +172,7 @@ end
                                    3.9661379199401257, 5.079410103608552, 6.1944618141188625,
                                    0.0, 7.930328728005131, 8.879994918604757, 2.986388408421915, 
                                    10.84972230524356, 11.844809275711485])
-        
+
         @test isa(m2p_dep_pos.pp.chol, CholeskyPivoted)
         @test isa(m2p_dep_pos_kw.pp.chol, CholeskyPivoted)
     elseif dmethod == :qr
@@ -186,7 +186,7 @@ end
                                    3.9661379199401257, 5.079410103608552, 6.1944618141188625,
                                    0.0, 7.930328728005131, 8.879994918604757, 2.986388408421915, 
                                    10.84972230524356, 11.844809275711485])
-        
+
         @test isa(m2p.pp.qr, QRPivoted)
 
         @test isa(m2p_dep_pos.pp.qr, QRPivoted)
@@ -198,7 +198,7 @@ end
 
     @test GLM.linpred_rank(m2p.pp) == 11
     @test isapprox(deviance(m2p), 0.1215758392280204)
-    
+
     @test GLM.linpred_rank(m2p_dep_pos.pp) == GLM.linpred_rank(m2p.pp)
     @test isapprox(deviance(m2p_dep_pos), deviance(m2p))
     @test isapprox(coef(m2p_dep_pos), coef(m2p))
@@ -210,7 +210,7 @@ end
 
 @testset "Saturated linear model with $dmethod" for dmethod in (:cholesky, :qr)
     df1 = DataFrame(x=["a", "b", "c"], y=[1, 2, 3])
-    
+
     model = lm(@formula(y ~ x), df1; method=dmethod)
     ct = coeftable(model)
     @test dof_residual(model) == 0
@@ -279,7 +279,7 @@ end
         # https://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/NoInt1.dat
 
         data = DataFrame(x = 60:70, y = 130:140)
-        
+
         mdl = lm(@formula(y ~ 0 + x), data; method=:cholesky)
         @test coef(mdl) ≈ [2.07438016528926]
         @test stderror(mdl) ≈ [0.165289256198347E-01]
@@ -303,7 +303,7 @@ end
         # https://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/NoInt2.dat
 
         data = DataFrame(x = [4, 5, 6], y = [3, 4, 4])
-        
+
         mdl = lm(@formula(y ~ 0 + x), data; method=:cholesky)
         @test coef(mdl) ≈ [0.727272727272727]
         @test stderror(mdl) ≈ [0.420827318078432E-01]
@@ -324,7 +324,7 @@ end
         X = [4 5 6]'
         y = [3, 4, 4]
         data = DataFrame(x = [4, 5, 6], y = [3, 4, 4])
-    
+
         mdl1 = lm(@formula(y ~ 0 + x), data; method=dmethod)
         mdl2 = lm(X, y)
 
@@ -523,7 +523,7 @@ end
 @testset "Poisson LogLink offset with weights with $dmethod" for dmethod in (:cholesky, :qr)
     gm7pw = fit(GeneralizedLinearModel, @formula(round(Postwt) ~ 1 + Prewt + Treat), anorexia,
                 Poisson(), LogLink(), method=dmethod, offset=log.(anorexia.Prewt),
-                wts=repeat(1:4, outer=18), rtol=1e-8)
+                weights=repeat(1:4, outer=18), rtol=1e-8)
 
     @test GLM.cancancel(gm7pw.rr)
     test_show(gm7pw)
@@ -622,7 +622,7 @@ admit_agr = DataFrame(count = [28., 97, 93, 55, 33, 54, 28, 12],
 @testset "Aggregated Binomial LogitLink with $dmethod" for dmethod in (:cholesky, :qr)
     for distr in (Binomial, Bernoulli)
         gm14 = fit(GeneralizedLinearModel, @formula(admit ~ 1 + rank), admit_agr, distr();
-                   method=dmethod, wts=Array(admit_agr.count))
+                   method=dmethod, weights=Array(admit_agr.count))
         @test dof(gm14) == 4
         @test nobs(gm14) == 400
         @test isapprox(deviance(gm14), 474.9667184280627)
@@ -645,7 +645,7 @@ admit_agr2.p = admit_agr2.admit ./ admit_agr2.count
 ## The model matrix here is singular so tests like the deviance are just round off error
 @testset "Binomial LogitLink aggregated with $dmethod" for dmethod in (:cholesky, :qr)
     gm15 = fit(GeneralizedLinearModel, @formula(p ~ rank), admit_agr2, Binomial(),
-               method=dmethod, wts=admit_agr2.count)
+               method=dmethod, weights=admit_agr2.count)
     test_show(gm15)
     @test dof(gm15) == 4
     @test nobs(gm15) == 400
@@ -662,7 +662,7 @@ end
 # Weighted Gamma example (weights are totally made up)
 @testset "Gamma InverseLink Weights with $dmethod" for dmethod in (:cholesky, :qr)
     gm16 = fit(GeneralizedLinearModel, @formula(lot1 ~ 1 + u), clotting, Gamma(),
-               method=dmethod, wts=[1.5,2.0,1.1,4.5,2.4,3.5,5.6,5.4,6.7])
+               method=dmethod, weights=[1.5,2.0,1.1,4.5,2.4,3.5,5.6,5.4,6.7])
     test_show(gm16)
     @test dof(gm16) == 3
     @test nobs(gm16) == 32.7
@@ -679,7 +679,7 @@ end
 # Weighted Poisson example (weights are totally made up)
 @testset "Poisson LogLink Weights with $dmethod" for dmethod in (:cholesky, :qr)
     gm17 = glm(@formula(Counts ~ Outcome + Treatment), dobson, Poisson(),
-               method=dmethod, wts = [1.5,2.0,1.1,4.5,2.4,3.5,5.6,5.4,6.7])
+               method=dmethod, weights = [1.5,2.0,1.1,4.5,2.4,3.5,5.6,5.4,6.7])
     test_show(gm17)
     @test dof(gm17) == 5
     @test isapprox(deviance(gm17), 17.699857821414266)
@@ -769,8 +769,8 @@ end
 
 @testset "Weighted NegativeBinomial LogLink, θ to be estimated with Cholesky" begin
     halfn = round(Int, 0.5*size(quine, 1))
-    wts   = vcat(fill(0.8, halfn), fill(1.2, size(quine, 1) - halfn))
-    gm20a = negbin(@formula(Days ~ Eth+Sex+Age+Lrn), quine, LogLink(); wts=wts)
+    weights = vcat(fill(0.8, halfn), fill(1.2, size(quine, 1) - halfn))
+    gm20a = negbin(@formula(Days ~ Eth+Sex+Age+Lrn), quine, LogLink(); weights=weights)
     test_show(gm20a)
     @test dof(gm20a) == 8
     @test isapprox(deviance(gm20a), 164.45910399188858, rtol = 1e-7)
@@ -892,7 +892,7 @@ end
     # Poisson with categorical predictors, weights and offset
     nointglm3 = fit(GeneralizedLinearModel, @formula(round(Postwt) ~ 0 + Prewt + Treat), anorexia,
                     Poisson(), LogLink(); offset=log.(anorexia.Prewt),
-                    wts=repeat(1:4, outer=18), rtol=1e-8, dropcollinear=false)
+                    weights=repeat(1:4, outer=18), rtol=1e-8, dropcollinear=false)
     @test !hasintercept(nointglm3)
     @test GLM.cancancel(nointglm3.rr)
     test_show(nointglm3)
@@ -947,7 +947,7 @@ end
     # Poisson with categorical predictors, weights and offset
     nointglm3 = fit(GeneralizedLinearModel, @formula(round(Postwt) ~ 0 + Prewt + Treat), anorexia,
                     Poisson(), LogLink(); method=dmethod, offset=log.(anorexia.Prewt),
-                    wts=repeat(1:4, outer=18), rtol=1e-8, dropcollinear=false)
+                    weights=repeat(1:4, outer=18), rtol=1e-8, dropcollinear=false)
     @test !hasintercept(nointglm3)
     @test GLM.cancancel(nointglm3.rr)
     test_show(nointglm3)
@@ -1649,14 +1649,14 @@ end
         lm4 = lm(view(x, :, :), view(y, :); method=dmethod)
         @test coef(lm1) == coef(lm2) == coef(lm3) == coef(lm4)
 
-        lm5 = lm(x, y, wts=w, method=dmethod)
-        lm6 = lm(x, view(y, :), method=dmethod, wts=w)
-        lm7 = lm(view(x, :, :), y, method=dmethod, wts=w)
-        lm8 = lm(view(x, :, :), view(y, :), method=dmethod, wts=w)
-        lm9 = lm(x, y, method=dmethod, wts=view(w, :))
-        lm10 = lm(x, view(y, :), method=dmethod, wts=view(w, :))
-        lm11 = lm(view(x, :, :), y, method=dmethod, wts=view(w, :))
-        lm12 = lm(view(x, :, :), view(y, :), method=dmethod, wts=view(w, :))
+        lm5 = lm(x, y, weights=w, method=dmethod)
+        lm6 = lm(x, view(y, :), method=dmethod, weights=w)
+        lm7 = lm(view(x, :, :), y, method=dmethod, weights=w)
+        lm8 = lm(view(x, :, :), view(y, :), method=dmethod, weights=w)
+        lm9 = lm(x, y, method=dmethod, weights=view(w, :))
+        lm10 = lm(x, view(y, :), method=dmethod, weights=view(w, :))
+        lm11 = lm(view(x, :, :), y, method=dmethod, weights=view(w, :))
+        lm12 = lm(view(x, :, :), view(y, :), method=dmethod, weights=view(w, :))
         @test coef(lm5) == coef(lm6) == coef(lm7) == coef(lm8) == coef(lm9) == coef(lm10) ==
             coef(lm11) == coef(lm12)
 
@@ -1667,14 +1667,14 @@ end
         glm4 = glm(view(x, :, :), view(y, :), Binomial(), method=dmethod)
         @test coef(glm1) == coef(glm2) == coef(glm3) == coef(glm4)
 
-        glm5 = glm(x, y, Binomial(), method=dmethod, wts=w)
-        glm6 = glm(x, view(y, :), Binomial(), method=dmethod, wts=w)
-        glm7 = glm(view(x, :, :), y, Binomial(), method=dmethod, wts=w)
-        glm8 = glm(view(x, :, :), view(y, :), Binomial(), method=dmethod, wts=w)
-        glm9 = glm(x, y, Binomial(), method=dmethod, wts=view(w, :))
-        glm10 = glm(x, view(y, :), Binomial(), method=dmethod, wts=view(w, :))
-        glm11 = glm(view(x, :, :), y, Binomial(), method=dmethod, wts=view(w, :))
-        glm12 = glm(view(x, :, :), view(y, :), Binomial(), method=dmethod, wts=view(w, :))
+        glm5 = glm(x, y, Binomial(), method=dmethod, weights=w)
+        glm6 = glm(x, view(y, :), Binomial(), method=dmethod, weights=w)
+        glm7 = glm(view(x, :, :), y, Binomial(), method=dmethod, weights=w)
+        glm8 = glm(view(x, :, :), view(y, :), Binomial(), method=dmethod, weights=w)
+        glm9 = glm(x, y, Binomial(), method=dmethod, weights=view(w, :))
+        glm10 = glm(x, view(y, :), Binomial(), method=dmethod, weights=view(w, :))
+        glm11 = glm(view(x, :, :), y, Binomial(), method=dmethod, weights=view(w, :))
+        glm12 = glm(view(x, :, :), view(y, :), Binomial(), method=dmethod, weights=view(w, :))
         @test coef(glm5) == coef(glm6) == coef(glm7) == coef(glm8) == coef(glm9) == coef(glm10) ==
             coef(glm11) == coef(glm12)
     end
@@ -2036,8 +2036,8 @@ end
     # > data(Duncan)
     # > lm1 = lm(prestige ~ 1 + income + education, Duncan)
     # > vif(lm1)
-    #    income education 
-    #    2.1049    2.1049 
+    #    income education
+    #    2.1049    2.1049
     # > lm2 = lm(prestige ~ 1 + income + education + type, Duncan)
     # > vif(lm2)
     #               GVIF Df GVIF^(1/(2*Df))
@@ -2048,26 +2048,26 @@ end
     lm1 = lm(@formula(Prestige ~ 1 + Income + Education), duncan)
     @test termnames(lm1)[2] == coefnames(lm1)
     @test vif(lm1) ≈ gvif(lm1)
-    
+
     lm1_noform = lm(modelmatrix(lm1), response(lm1))
     @test vif(lm1) ≈ vif(lm1_noform)
     @test_throws ArgumentError("model was fitted without a formula") gvif(lm1_noform)
-    
+
     lm1log = lm(@formula(Prestige ~ 1 + exp(log(Income)) + exp(log(Education))), duncan)
     @test termnames(lm1log)[2] == coefnames(lm1log) == ["(Intercept)", "exp(log(Income))", "exp(log(Education))"]
     @test vif(lm1) ≈ vif(lm1log)
-    
+
     gm1 = glm(modelmatrix(lm1), response(lm1), Normal())
     @test vif(lm1) ≈ vif(gm1)
-    
+
     lm2 = lm(@formula(Prestige ~ 1 + Income + Education + Type), duncan)
     @test termnames(lm2)[2] != coefnames(lm2)
     @test gvif(lm2; scale=true) ≈ [1.486330, 2.301648, 1.502666] atol=1e-4
-    
+
     gm2 = glm(@formula(Prestige ~ 1 + Income + Education + Type), duncan, Normal())
     @test termnames(gm2)[2] != coefnames(gm2)
-    @test gvif(gm2; scale=true) ≈ [1.486330, 2.301648, 1.502666] atol=1e-4   
-    
+    @test gvif(gm2; scale=true) ≈ [1.486330, 2.301648, 1.502666] atol=1e-4
+
     # the VIF definition depends on modelmatrix, vcov and stderror returning valid
     # values. It doesn't care about links, offsets, etc. as long as the model matrix,
     # vcov matrix and stderrors are well defined.