JuliaDynamics · kahaaga · Nov 30, 2021 · Dec 1, 2021 · Dec 1, 2021 · Dec 1, 2021
diff --git a/Project.toml b/Project.toml
@@ -29,7 +29,7 @@ Wavelets = "29a6e085-ba6d-5f35-a997-948ac2efa89a"
 
 [compat]
 ChaosTools = "^2.0"
-DelayEmbeddings = "^2.0"
+DelayEmbeddings = "^2.0.2"
 Distances = "^0.10"
 Distributions = "^0.24, 0.25"
 DynamicalSystemsBase = "^2.0"
@@ -41,7 +41,6 @@ Neighborhood = "0.2.3"
 Reexport = "0.2, 1"
 Requires = "^1"
 SimpleDiffEq = "^1"
-StaticArrays = "^1"
 StatsBase = "^0.33"
 TimeseriesSurrogates = "^1.2, 2"
 TransferEntropy = "^1.7"

diff --git a/docs/make.jl b/docs/make.jl
@@ -50,6 +50,9 @@ PAGES = [
         "generalized_entropy.md",
         "info_estimators.md",
     ],
+    "Exploratory" => [
+        "leanings.md"
+    ],
 
     "example_systems.md",
     "Utilities" => [

diff --git a/docs/src/index.md b/docs/src/index.md
@@ -36,6 +36,10 @@ We also provide estimators for generalized entropy and mutual information, thoug
 - Generalized entropy ([`genentropy`](@ref)) and fast histograms ([`binhist`](@ref))
 - Mutual information ([`mutualinfo`](@ref))
 
+### Exploratory methods
+
+- [Penchants and leanings](@ref leanings).
+
 ## Contributions/issues
 
 Do you want additional methods or example systems to be implemented? Make a PR to the 

diff --git a/docs/src/leanings.md b/docs/src/leanings.md
@@ -0,0 +1,10 @@
+# [Penchants and leanings](@id leanings)
+
+Penchants and leanings are probability-based exploratory causal inference measures that rely on the estimation of probabilities directly from the data. Here, we provide implementations of the *penchants and leanings* 
+algoriths from McCracken and Weigel (2016).
+
+```@docs
+penchant
+lean
+```
+
diff --git a/src/CausalityTools.jl b/src/CausalityTools.jl
@@ -15,6 +15,7 @@ module CausalityTools
     include("CrossMappings/CrossMappings.jl")
     include("SMeasure/smeasure.jl")
     include("PredictiveAsymmetry/PredictiveAsymmetry.jl")
+    include("Leanings/Leanings.jl")
     include("example_systems/ExampleSystems.jl")
 
     using Requires 

diff --git a/src/Leanings/Leanings.jl b/src/Leanings/Leanings.jl
@@ -0,0 +1,5 @@
+using Reexport
+
+@reexport module Leanings
+    include("penchants.jl")
+end
diff --git a/src/Leanings/example.jl b/src/Leanings/example.jl
@@ -0,0 +1,57 @@
+using CausalityTools, DynamicalSystems, Plots, Statistics, Distributions; gr()
+
+npts, Ttr = 10000, 5000
+x, y = columns(trajectory(henon2(c_xy = 0.0), npts - 1, Ttr = Ttr))
+xr, yr = rand(Uniform(-1, 1), npts), rand(Uniform(-1, 1), npts)
+plot()
+plot!(x, label = "x")
+plot!(y, label = "y")
+
+import Entropies: symbolize, SymbolicPermutation
+
+m = 2; τ = -1
+n = 5
+weighted = false
+nreps = 50
+ls = 0:10
+leans_wn = zeros(nreps, length(ls))
+leans_henon_c0 = zeros(nreps, length(ls))
+leans_henon_c2 = zeros(nreps, length(ls))
+
+function partition(x, n::Int)
+    xmin = minimum(x) # the reference point
+    Δx = (maximum(x) - xmin) / n
+    [floor(Int, (xᵢ - xmin) / Δx) for xᵢ in x]
+end
+
+for i = 1:nreps 
+    xr, yr = rand(Uniform(-1, 1), npts), rand(Uniform(-1, 1), npts)
+    X = partition(xr, n)#symbolize(xr, SymbolicPermutation(m = m, τ = τ))
+    Y = partition(yr, n)#symbolize(yr, SymbolicPermutation(m = m, τ = τ))
+    for (j, l) in enumerate(ls)
+        leans_wn[i, j] = lean(X, Y, l, weighted = weighted)
+    end
+
+    x, y = columns(trajectory(henon2(c_xy = 0.0), npts - 1, Ttr = Ttr))
+    X = partition(x, n)#symbolize(x, SymbolicPermutation(m = m, τ = τ))
+    Y = partition(y, n)#symbolize(y, SymbolicPermutation(m = m, τ = τ))
+    for (j, l) in enumerate(ls)
+        leans_henon_c0[i, j] = lean(X, Y, l, weighted = weighted)
+    end
+
+    x, y = columns(trajectory(henon2(c_xy = 2.0), npts - 1, Ttr = Ttr))
+    X = partition(x, n)#symbolize(x, SymbolicPermutation(m = m, τ = τ))
+    Y = partition(y, n)#symbolize(y, SymbolicPermutation(m = m, τ = τ))
+    for (j, l) in enumerate(ls)
+        leans_henon_c2[i, j] = lean(X, Y, l, weighted = weighted)
+    end
+end
+
+mean_leans_wn = dropdims(mean(leans_wn, dims = 1), dims = 1)
+mean_leans_henon_c0 = dropdims(mean(leans_henon_c0, dims = 1), dims = 1)
+mean_leans_henon_c2 = dropdims(mean(leans_henon_c2, dims = 1), dims = 1)
+
+plot(xlabel = "l", ylabel = "λx→y", ylims = (-1.1, 1.1))
+#plot!(ls, mean_leans_wn, label = "white noise", marker = :star)
+plot!(ls, mean_leans_henon_c0, label = "henon (c_xy = 0.0)", marker = :circle)
+#plot!(ls, mean_leans_henon_c2, label = "henon (c_xy = 2.0)", marker = :star5)
diff --git a/src/Leanings/penchants.jl b/src/Leanings/penchants.jl
@@ -0,0 +1,184 @@
+
+export penchant, lean
+
+using Statistics
+
+# We just assume that the time series have been pre-binned or symbolized
+# However, `get_states`` might turn out much more complicated if we decide 
+# to implement more complicated cause-effect assignments
+get_states(x, y) = unique([x; y])
+
+# Getting the penchants might also become more complex for other 
+# complicated cause-effect assignments, so separate it out in a function.
+function get_penchants(x, y)
+    states = get_states(x, y)
+
+    # All possible 2-permutations (with repetitions) of the unique
+    # states in `x ∪ y`.
+    return Iterators.product(states, states)
+end
+
+function penchant_counts(x, y, l = 1)
+    @assert length(x) == length(y)
+    n = length(x)
+
+    # Leave reasonably many for probability computations. `l` can't be too large.
+    @assert l < n ÷ 2
+
+    # Number of time the state y_t = 1 AND x_{t-1} = 1 appears in {x, y}
+    ns_ec = Vector{Int}(undef, 0)
+    ns_c = Vector{Int}(undef, 0)
+    ns_e = Vector{Int}(undef, 0)
+
+    # For each possible penchant, count occurrences.
+    penchants = get_penchants(x, y)
+
+    @inbounds for penchant in penchants
+        state_cause, state_effect = penchant[1], penchant[2]
+
+        # Number of times the assumed cause has appeared
+        n_c = 0
+
+        # Number of times the assumed effect has appeared
+        n_e = 0
+
+        # Number of times the cause AND effect has appeared simulaneously
+        n_ec = 0
+
+        for t = l+1:n
+            # "we required that the cause must precede the effect". This is the 
+            # standard l-assignment
+            effect = y[t]
+            cause = x[t-l]
+
+            if effect == state_effect && cause == state_cause
+                n_ec += 1
+            end
+
+            if effect == state_effect
+                n_e += 1
+            end
+
+            if cause == state_cause
+                n_c += 1
+            end
+        end
+        push!(ns_ec, n_ec)
+        push!(ns_c, n_c)
+        push!(ns_e, n_e)
+    end
+
+    return ns_c, ns_e, ns_ec
+end
+
+
+function mean_observed_ρ(ρs, κs, ns_ec, L, weighted = false)
+    ρ = 0.0
+    ct = 0
+    @inbounds for i = 1:length(κs)
+        if κs[i] != 0.0
+            if weighted
+                ρ += ρs[i] * ns_ec[i]
+            else
+                ρ += ρs[i]
+            end
+            ct += 1
+        end
+    end
+
+    if weighted
+        return ρ / L 
+    else 
+        return ρ / ct
+    end
+end
+
+"""
+    penchant(x, y, l; weighted = false) → ρ̄
+
+Computes the *mean observed penchant* `ρ̄` (McCracken & Weigel, 2016)[^McCrackenWeigel2016] 
+of the `l`-assignment ``\\{\\text{C}, \\text{E}\\} = \\{\\text{cause}, \\text{effect}\\} =\\{x_{t-l}, y_t\\}``.
+
+If `weighted == true`, then compute the *weighted mean observed penchant*.
+
+## Definition 
+
+The *causal penchant* , or causal tendency, is a causal indicator defined as 
+``\\rho_{CE} = P(E|C) - P(E|\\bar{C})``, where ``\\rho_{CE} \\in [-1, 1]`` (McCracken & Weigel, 2016). 
+If ``\\rho_{CE} > 0``, then ``C`` causes or drives ``E``, and if 
+``\\rho_{CE} \\leq 0``, then the direction of influence is undetermined.
+
+A direct formula for ``\\rho_{CE}`` can be obtained using Bayes' theorem.
+
+```math
+P(E|C) = P(C|E) \\dfrac{P(E)}{P(C)} 
+```
+Using the definitions of probability complements, one arrives at the following 
+expression (see the original paper for a detailed derivation):
+
+```math
+\\rho_{CE} = P(E|C) \\left[1 + \\dfrac{P(C)}{1-P(C)} \\right] - \\dfrac{P(E)}{1 - P(C)}.
+```
+
+Applying appropriate discretization schemes, these probabilities can be estimated 
+directly from time series using simple counting, which makes the method fast and 
+well-suited for exploratory causal inference analysis.
+
+See also [`lean`](@ref) and data requirements discussed therein.
+
+[^McCrackenWeigel2016]: McCracken, J. M., & Weigel, R. S. (2016). Nonparametric causal inference for bivariate time series. Physical Review E, 93(2), 022207.
+"""
+function penchant(x, y, l = 1; weighted = true)
+    n = length(x)
+    ns_c, ns_e, ns_ec = penchant_counts(x, y, l)
+
+    L = n - l # "library length", resulting from shifting the time series l steps
+    Ps_e = ns_e ./ L
+    Ps_c = ns_c ./ L
+    κs = ns_ec ./ ns_c
+    ρs = κs.* (1 .+ Ps_c ./ (1 .- Ps_c)) .- (Ps_e) ./ (1 .- Ps_c)
+
+    # Mean observed penchant. We can determine which penchants are observed 
+    # by looking for zeros in `κs`. If `κs[i] == 0`, then the `i`-th penchant
+    # is unobserved. Why? When the penchant is unobserved, there are no 
+    # instances of the cause and effect occurring together, so 
+    # `P(effect | cause) = P(effect, cause) / P(cause) = 0 / P(cause) = 0` 
+    # As explained in section D in the paper, including unobserved penchants
+    # would be meaningless for causal inference.
+    mean_observed_ρ(ρs, κs, ns_ec, L, weighted)
+end
+
+"""
+
+    lean(x, y, l = 1; weighted = true) → ρ̄
+
+Compute the *mean observed leaning* `λ ∈ [-2, 2]` (McCracken & Weigel, 2016)[^McCrackenWeigel2016] 
+between scalar time series `x` and `y`.
+
+If `ρ̄ > 0`, then the probability that `x` drives `y` is higher than the probability 
+that `y` drives `x`. Vice versa, if `ρ̄ > 0`, then the probability that `y` drives `x` 
+is higher than the probability that `x` drives `y`.
+
+## Weighting
+
+If `weighted == true`, then each penchant is weighted by the number of times it appears in 
+the data, which yields the *weighted mean observed leaning*. This the default behavior, since 
+"...[the weighted mean observed leaning] accounts for the frequency of observed cause-effect pairs 
+within the data, which is assumed to be a predictor of causal influence" (McCracken & Weigel, 2016).
+
+## Data requirements
+
+Both `x` and `y` *must* be discrete, and the number discrete states can't be too high 
+relative to the number of points in `x` and `y`. Otherwise, probabilities cannot be reliably estimated. 
+If the function returns `NaN`, then you probably either haven't discretized your time series, 
+or the partition is too fine-grained given the numbers of points available.
+
+Hence, this implementation *does not* take care of the tolerance domains discussed in the paper. 
+Pre-process your time series using appropriate binning or symbolization schemes.
+
+[^McCrackenWeigel2016]: McCracken, J. M., & Weigel, R. S. (2016). Nonparametric causal inference for bivariate time series. Physical Review E, 93(2), 022207.
+"""
+function lean(x, y, l = 1; weighted = true)
+    return penchant(x, y, l, weighted = weighted) - penchant(y, x, l, weighted = weighted)
+end 
+
diff --git a/src/example_systems/discretemaps/nonlinear_sindriver_2d.jl b/src/example_systems/discretemaps/nonlinear_sindriver_2d.jl
@@ -0,0 +1,34 @@
+function eom_nonlinear_sindriver_2d(dx, x, p, n)
+    a, b, c, t, Δt = (p...,)
+    x, y = x[1], x[2]
+    𝒩 = Normal(0, 1)
+
+    dx[1] = sin(t)
+    dx[2] = a*x * (1 - b*x) + c*rand(𝒩)
+    p[end-1] += Δt # update t
+
+    return
+end
+
+"""
+    nonlinear_sindriver_2d(;u₀ = 0, a = 1.0, b = 1.0, c = 2.0, Δt = 1)
+
+A discrete, nonlinear 2D system from McCracken & Weigel (2016)[^McCrackenWeigel2016],
+governed by the following equations of motion:
+
+```math
+\\begin{aligned}
+x_{t+1} = \\sin(t) \\\\
+y(t+1) &= a x_t (1 - B x_t) + C \\eta_t
+\\end{aligned}
+```
+
+where ``\\eta_t \\sim \\mathcal{N}(\\mu = 0, \\sigma = 1)``, ``A, B, C \\in [0, 1]``.
+
+[^McCrackenWeigel2016]: McCracken, J. M., & Weigel, R. S. (2016). Nonparametric causal inference for bivariate time series. Physical Review E, 93(2), 022207.
+"""
+function nonlinear_sindriver_2d(;u₀ = 0, a = 1.0, b = 1.0, c = 2.0, Δt = (1/30*π))
+    @assert a >= 0; @assert b >= 0; @assert c >= 0
+
+    DiscreteDynamicalSystem(eom_nonlinear_sindriver2d, [0, u₀], [a, b, c, 0, Δt])
+end
diff --git a/test/methods/test_leanings.jl b/test/methods/test_leanings.jl
@@ -0,0 +1,12 @@
+
+@testset "Leanings" begin
+    x = [0, 0, 1, 0, 0, 1, 0, 0, 1, 0]
+    y = [0, 0, 0, 1, 0, 0, 1, 0, 0, 1]
+    @test penchant(x, y, 1, weighted = false) ≈ 1.0
+    @test penchant(y, x, 1, weighted = false) ≈ 1/7
+
+    @test penchant(x, y, 1, weighted = true) ≈ 1.0
+    @test penchant(y, x, 1, weighted = true) ≈ 3/63
+
+    @test lean(x, y, 1, weighted = true) ≈ 60/63
+end
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -8,6 +8,7 @@ include("methods/test_smeasure.jl")
 include("methods/test_joint_distance_distribution.jl")
 include("methods/test_predictive_asymmetry.jl")
 include("methods/test_crossmapping.jl")
+include("methods/test_leanings.jl")
 
 include("systems/discrete/test_discrete_systems.jl")
 include("systems/continuous/test_continuous_systems.jl")