diff --git a/Project.toml b/Project.toml index 68193191d..c057fd3b4 100644 --- a/Project.toml +++ b/Project.toml @@ -29,7 +29,7 @@ Wavelets = "29a6e085-ba6d-5f35-a997-948ac2efa89a" [compat] ChaosTools = "^2.0" -DelayEmbeddings = "^2.0" +DelayEmbeddings = "^2.0.2" Distances = "^0.10" Distributions = "^0.24, 0.25" DynamicalSystemsBase = "^2.0" @@ -41,7 +41,6 @@ Neighborhood = "0.2.3" Reexport = "0.2, 1" Requires = "^1" SimpleDiffEq = "^1" -StaticArrays = "^1" StatsBase = "^0.33" TimeseriesSurrogates = "^1.2, 2" TransferEntropy = "^1.7" diff --git a/docs/make.jl b/docs/make.jl index 7da8a31be..7af75ebd2 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -50,6 +50,9 @@ PAGES = [ "generalized_entropy.md", "info_estimators.md", ], + "Exploratory" => [ + "leanings.md" + ], "example_systems.md", "Utilities" => [ diff --git a/docs/src/index.md b/docs/src/index.md index 9423d61fa..b95de03b0 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -36,6 +36,10 @@ We also provide estimators for generalized entropy and mutual information, thoug - Generalized entropy ([`genentropy`](@ref)) and fast histograms ([`binhist`](@ref)) - Mutual information ([`mutualinfo`](@ref)) +### Exploratory methods + +- [Penchants and leanings](@ref leanings). + ## Contributions/issues Do you want additional methods or example systems to be implemented? Make a PR to the diff --git a/docs/src/leanings.md b/docs/src/leanings.md new file mode 100644 index 000000000..b75ca0aa0 --- /dev/null +++ b/docs/src/leanings.md @@ -0,0 +1,10 @@ +# [Penchants and leanings](@id leanings) + +Penchants and leanings are probability-based exploratory causal inference measures that rely on the estimation of probabilities directly from the data. Here, we provide implementations of the *penchants and leanings* +algoriths from McCracken and Weigel (2016). + +```@docs +penchant +lean +``` + diff --git a/src/CausalityTools.jl b/src/CausalityTools.jl index 9a5c11d4d..9dbbb8f36 100644 --- a/src/CausalityTools.jl +++ b/src/CausalityTools.jl @@ -15,6 +15,7 @@ module CausalityTools include("CrossMappings/CrossMappings.jl") include("SMeasure/smeasure.jl") include("PredictiveAsymmetry/PredictiveAsymmetry.jl") + include("Leanings/Leanings.jl") include("example_systems/ExampleSystems.jl") using Requires diff --git a/src/Leanings/Leanings.jl b/src/Leanings/Leanings.jl new file mode 100644 index 000000000..20858c23c --- /dev/null +++ b/src/Leanings/Leanings.jl @@ -0,0 +1,5 @@ +using Reexport + +@reexport module Leanings + include("penchants.jl") +end \ No newline at end of file diff --git a/src/Leanings/example.jl b/src/Leanings/example.jl new file mode 100644 index 000000000..6c8201ff9 --- /dev/null +++ b/src/Leanings/example.jl @@ -0,0 +1,57 @@ +using CausalityTools, DynamicalSystems, Plots, Statistics, Distributions; gr() + +npts, Ttr = 10000, 5000 +x, y = columns(trajectory(henon2(c_xy = 0.0), npts - 1, Ttr = Ttr)) +xr, yr = rand(Uniform(-1, 1), npts), rand(Uniform(-1, 1), npts) +plot() +plot!(x, label = "x") +plot!(y, label = "y") + +import Entropies: symbolize, SymbolicPermutation + +m = 2; τ = -1 +n = 5 +weighted = false +nreps = 50 +ls = 0:10 +leans_wn = zeros(nreps, length(ls)) +leans_henon_c0 = zeros(nreps, length(ls)) +leans_henon_c2 = zeros(nreps, length(ls)) + +function partition(x, n::Int) + xmin = minimum(x) # the reference point + Δx = (maximum(x) - xmin) / n + [floor(Int, (xᵢ - xmin) / Δx) for xᵢ in x] +end + +for i = 1:nreps + xr, yr = rand(Uniform(-1, 1), npts), rand(Uniform(-1, 1), npts) + X = partition(xr, n)#symbolize(xr, SymbolicPermutation(m = m, τ = τ)) + Y = partition(yr, n)#symbolize(yr, SymbolicPermutation(m = m, τ = τ)) + for (j, l) in enumerate(ls) + leans_wn[i, j] = lean(X, Y, l, weighted = weighted) + end + + x, y = columns(trajectory(henon2(c_xy = 0.0), npts - 1, Ttr = Ttr)) + X = partition(x, n)#symbolize(x, SymbolicPermutation(m = m, τ = τ)) + Y = partition(y, n)#symbolize(y, SymbolicPermutation(m = m, τ = τ)) + for (j, l) in enumerate(ls) + leans_henon_c0[i, j] = lean(X, Y, l, weighted = weighted) + end + + x, y = columns(trajectory(henon2(c_xy = 2.0), npts - 1, Ttr = Ttr)) + X = partition(x, n)#symbolize(x, SymbolicPermutation(m = m, τ = τ)) + Y = partition(y, n)#symbolize(y, SymbolicPermutation(m = m, τ = τ)) + for (j, l) in enumerate(ls) + leans_henon_c2[i, j] = lean(X, Y, l, weighted = weighted) + end +end + +mean_leans_wn = dropdims(mean(leans_wn, dims = 1), dims = 1) +mean_leans_henon_c0 = dropdims(mean(leans_henon_c0, dims = 1), dims = 1) +mean_leans_henon_c2 = dropdims(mean(leans_henon_c2, dims = 1), dims = 1) + +plot(xlabel = "l", ylabel = "λx→y", ylims = (-1.1, 1.1)) +#plot!(ls, mean_leans_wn, label = "white noise", marker = :star) +plot!(ls, mean_leans_henon_c0, label = "henon (c_xy = 0.0)", marker = :circle) +#plot!(ls, mean_leans_henon_c2, label = "henon (c_xy = 2.0)", marker = :star5) \ No newline at end of file diff --git a/src/Leanings/penchants.jl b/src/Leanings/penchants.jl new file mode 100644 index 000000000..662792d7b --- /dev/null +++ b/src/Leanings/penchants.jl @@ -0,0 +1,184 @@ + +export penchant, lean + +using Statistics + +# We just assume that the time series have been pre-binned or symbolized +# However, `get_states`` might turn out much more complicated if we decide +# to implement more complicated cause-effect assignments +get_states(x, y) = unique([x; y]) + +# Getting the penchants might also become more complex for other +# complicated cause-effect assignments, so separate it out in a function. +function get_penchants(x, y) + states = get_states(x, y) + + # All possible 2-permutations (with repetitions) of the unique + # states in `x ∪ y`. + return Iterators.product(states, states) +end + +function penchant_counts(x, y, l = 1) + @assert length(x) == length(y) + n = length(x) + + # Leave reasonably many for probability computations. `l` can't be too large. + @assert l < n ÷ 2 + + # Number of time the state y_t = 1 AND x_{t-1} = 1 appears in {x, y} + ns_ec = Vector{Int}(undef, 0) + ns_c = Vector{Int}(undef, 0) + ns_e = Vector{Int}(undef, 0) + + # For each possible penchant, count occurrences. + penchants = get_penchants(x, y) + + @inbounds for penchant in penchants + state_cause, state_effect = penchant[1], penchant[2] + + # Number of times the assumed cause has appeared + n_c = 0 + + # Number of times the assumed effect has appeared + n_e = 0 + + # Number of times the cause AND effect has appeared simulaneously + n_ec = 0 + + for t = l+1:n + # "we required that the cause must precede the effect". This is the + # standard l-assignment + effect = y[t] + cause = x[t-l] + + if effect == state_effect && cause == state_cause + n_ec += 1 + end + + if effect == state_effect + n_e += 1 + end + + if cause == state_cause + n_c += 1 + end + end + push!(ns_ec, n_ec) + push!(ns_c, n_c) + push!(ns_e, n_e) + end + + return ns_c, ns_e, ns_ec +end + + +function mean_observed_ρ(ρs, κs, ns_ec, L, weighted = false) + ρ = 0.0 + ct = 0 + @inbounds for i = 1:length(κs) + if κs[i] != 0.0 + if weighted + ρ += ρs[i] * ns_ec[i] + else + ρ += ρs[i] + end + ct += 1 + end + end + + if weighted + return ρ / L + else + return ρ / ct + end +end + +""" + penchant(x, y, l; weighted = false) → ρ̄ + +Computes the *mean observed penchant* `ρ̄` (McCracken & Weigel, 2016)[^McCrackenWeigel2016] +of the `l`-assignment ``\\{\\text{C}, \\text{E}\\} = \\{\\text{cause}, \\text{effect}\\} =\\{x_{t-l}, y_t\\}``. + +If `weighted == true`, then compute the *weighted mean observed penchant*. + +## Definition + +The *causal penchant* , or causal tendency, is a causal indicator defined as +``\\rho_{CE} = P(E|C) - P(E|\\bar{C})``, where ``\\rho_{CE} \\in [-1, 1]`` (McCracken & Weigel, 2016). +If ``\\rho_{CE} > 0``, then ``C`` causes or drives ``E``, and if +``\\rho_{CE} \\leq 0``, then the direction of influence is undetermined. + +A direct formula for ``\\rho_{CE}`` can be obtained using Bayes' theorem. + +```math +P(E|C) = P(C|E) \\dfrac{P(E)}{P(C)} +``` +Using the definitions of probability complements, one arrives at the following +expression (see the original paper for a detailed derivation): + +```math +\\rho_{CE} = P(E|C) \\left[1 + \\dfrac{P(C)}{1-P(C)} \\right] - \\dfrac{P(E)}{1 - P(C)}. +``` + +Applying appropriate discretization schemes, these probabilities can be estimated +directly from time series using simple counting, which makes the method fast and +well-suited for exploratory causal inference analysis. + +See also [`lean`](@ref) and data requirements discussed therein. + +[^McCrackenWeigel2016]: McCracken, J. M., & Weigel, R. S. (2016). Nonparametric causal inference for bivariate time series. Physical Review E, 93(2), 022207. +""" +function penchant(x, y, l = 1; weighted = true) + n = length(x) + ns_c, ns_e, ns_ec = penchant_counts(x, y, l) + + L = n - l # "library length", resulting from shifting the time series l steps + Ps_e = ns_e ./ L + Ps_c = ns_c ./ L + κs = ns_ec ./ ns_c + ρs = κs.* (1 .+ Ps_c ./ (1 .- Ps_c)) .- (Ps_e) ./ (1 .- Ps_c) + + # Mean observed penchant. We can determine which penchants are observed + # by looking for zeros in `κs`. If `κs[i] == 0`, then the `i`-th penchant + # is unobserved. Why? When the penchant is unobserved, there are no + # instances of the cause and effect occurring together, so + # `P(effect | cause) = P(effect, cause) / P(cause) = 0 / P(cause) = 0` + # As explained in section D in the paper, including unobserved penchants + # would be meaningless for causal inference. + mean_observed_ρ(ρs, κs, ns_ec, L, weighted) +end + +""" + + lean(x, y, l = 1; weighted = true) → ρ̄ + +Compute the *mean observed leaning* `λ ∈ [-2, 2]` (McCracken & Weigel, 2016)[^McCrackenWeigel2016] +between scalar time series `x` and `y`. + +If `ρ̄ > 0`, then the probability that `x` drives `y` is higher than the probability +that `y` drives `x`. Vice versa, if `ρ̄ > 0`, then the probability that `y` drives `x` +is higher than the probability that `x` drives `y`. + +## Weighting + +If `weighted == true`, then each penchant is weighted by the number of times it appears in +the data, which yields the *weighted mean observed leaning*. This the default behavior, since +"...[the weighted mean observed leaning] accounts for the frequency of observed cause-effect pairs +within the data, which is assumed to be a predictor of causal influence" (McCracken & Weigel, 2016). + +## Data requirements + +Both `x` and `y` *must* be discrete, and the number discrete states can't be too high +relative to the number of points in `x` and `y`. Otherwise, probabilities cannot be reliably estimated. +If the function returns `NaN`, then you probably either haven't discretized your time series, +or the partition is too fine-grained given the numbers of points available. + +Hence, this implementation *does not* take care of the tolerance domains discussed in the paper. +Pre-process your time series using appropriate binning or symbolization schemes. + +[^McCrackenWeigel2016]: McCracken, J. M., & Weigel, R. S. (2016). Nonparametric causal inference for bivariate time series. Physical Review E, 93(2), 022207. +""" +function lean(x, y, l = 1; weighted = true) + return penchant(x, y, l, weighted = weighted) - penchant(y, x, l, weighted = weighted) +end + diff --git a/src/example_systems/discretemaps/nonlinear_sindriver_2d.jl b/src/example_systems/discretemaps/nonlinear_sindriver_2d.jl new file mode 100644 index 000000000..ead84e60a --- /dev/null +++ b/src/example_systems/discretemaps/nonlinear_sindriver_2d.jl @@ -0,0 +1,34 @@ +function eom_nonlinear_sindriver_2d(dx, x, p, n) + a, b, c, t, Δt = (p...,) + x, y = x[1], x[2] + 𝒩 = Normal(0, 1) + + dx[1] = sin(t) + dx[2] = a*x * (1 - b*x) + c*rand(𝒩) + p[end-1] += Δt # update t + + return +end + +""" + nonlinear_sindriver_2d(;u₀ = 0, a = 1.0, b = 1.0, c = 2.0, Δt = 1) + +A discrete, nonlinear 2D system from McCracken & Weigel (2016)[^McCrackenWeigel2016], +governed by the following equations of motion: + +```math +\\begin{aligned} +x_{t+1} = \\sin(t) \\\\ +y(t+1) &= a x_t (1 - B x_t) + C \\eta_t +\\end{aligned} +``` + +where ``\\eta_t \\sim \\mathcal{N}(\\mu = 0, \\sigma = 1)``, ``A, B, C \\in [0, 1]``. + +[^McCrackenWeigel2016]: McCracken, J. M., & Weigel, R. S. (2016). Nonparametric causal inference for bivariate time series. Physical Review E, 93(2), 022207. +""" +function nonlinear_sindriver_2d(;u₀ = 0, a = 1.0, b = 1.0, c = 2.0, Δt = (1/30*π)) + @assert a >= 0; @assert b >= 0; @assert c >= 0 + + DiscreteDynamicalSystem(eom_nonlinear_sindriver2d, [0, u₀], [a, b, c, 0, Δt]) +end \ No newline at end of file diff --git a/test/methods/test_leanings.jl b/test/methods/test_leanings.jl new file mode 100644 index 000000000..8cd1cb682 --- /dev/null +++ b/test/methods/test_leanings.jl @@ -0,0 +1,12 @@ + +@testset "Leanings" begin + x = [0, 0, 1, 0, 0, 1, 0, 0, 1, 0] + y = [0, 0, 0, 1, 0, 0, 1, 0, 0, 1] + @test penchant(x, y, 1, weighted = false) ≈ 1.0 + @test penchant(y, x, 1, weighted = false) ≈ 1/7 + + @test penchant(x, y, 1, weighted = true) ≈ 1.0 + @test penchant(y, x, 1, weighted = true) ≈ 3/63 + + @test lean(x, y, 1, weighted = true) ≈ 60/63 +end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 58099aebe..1c3c61e7a 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -8,6 +8,7 @@ include("methods/test_smeasure.jl") include("methods/test_joint_distance_distribution.jl") include("methods/test_predictive_asymmetry.jl") include("methods/test_crossmapping.jl") +include("methods/test_leanings.jl") include("systems/discrete/test_discrete_systems.jl") include("systems/continuous/test_continuous_systems.jl")