Skip to content

[SEDONA-648] Implement Distributed K Nearest Neighbor Join #2805

[SEDONA-648] Implement Distributed K Nearest Neighbor Join

[SEDONA-648] Implement Distributed K Nearest Neighbor Join #2805

Workflow file for this run

name: R build
on:
push:
branches:
- master
paths:
- 'common/**'
- 'spark/**'
- 'spark-shaded/**'
- 'pom.xml'
- 'R/**'
- '.github/workflows/r.yml'
pull_request:
branches:
- '*'
paths:
- 'common/**'
- 'spark/**'
- 'spark-shaded/**'
- 'pom.xml'
- 'R/**'
- '.github/workflows/r.yml'
env:
MAVEN_OPTS: -Dmaven.wagon.httpconnectionManager.ttlSeconds=60
DO_NOT_TRACK: true
jobs:
build:
runs-on: ubuntu-22.04
strategy:
fail-fast: true
matrix:
spark: [3.0.3, 3.1.2, 3.2.1, 3.3.0, 3.4.0, 3.5.0]
hadoop: [3]
scala: [2.12.15]
r: [oldrel, release]
env:
SPARK_VERSION: ${{ matrix.spark }}
HADOOP_VERSION: ${{ matrix.hadoop }}
SCALA_VERSION: ${{ matrix.scala }}
# Ensure the temporary auth token for this workflow, instead of the
# bundled GitHub PAT from the `remotes` package is used for
# `remotes::install_github()`
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
steps:
- name: Delete existing R binaries
run: |
for b in R Rscript
do
while [ -n "$(which "$b")" ]
do
sudo rm -v "$(which "$b")"
done
done
shell: bash
- name: Delete existing JDK installation(s)
run: |
sudo apt-get -y remove --purge default-jdk adoptopenjdk-11-hotspot || :
shell: bash
- uses: actions/checkout@v4
- uses: r-lib/actions/[email protected]
with:
r-version: ${{ matrix.r }}
use-public-rspm: true
- name: Query R dependencies
uses: r-lib/actions/[email protected]
with:
cache: true
extra-packages: |
any::testthat
any::rcmdcheck
working-directory : './R'
- name: Build and check R package
uses: r-lib/actions/[email protected]
with:
build_args: 'c("--no-build-vignettes", "--no-manual")'
args: 'c("--no-build-vignettes", "--no-manual", "--no-tests")'
error-on: '"error"'
working-directory: './R'
env:
_R_CHECK_FORCE_SUGGESTS_: false
- name: Install apache.sedona from source
run: Rscript -e 'install.packages("./R/", repos = NULL, type = "source")'
- uses: actions/setup-java@v4
with:
distribution: 'temurin'
java-version: '8'
cache: 'maven'
- name: Get OS name
id: os-name
run: |
# `os_name` will be like "Ubuntu-20.04.1-LTS"
OS_NAME=$(lsb_release -ds | sed 's/\s/-/g')
echo "os-name=$OS_NAME" >> $GITHUB_OUTPUT
- name: Cache Spark installations
if: runner.os != 'Windows'
uses: actions/cache@master
with:
path: ~/spark
key: apache.sedona-apache-spark-${{ steps.os-name.outputs.os-name }}-${{ env.SPARK_VERSION }}
- name: Build Sedona libraries
run: |
SPARK_COMPAT_VERSION=${SPARK_VERSION:0:3}
mvn -q clean install -DskipTests -Dspark=${SPARK_COMPAT_VERSION} -Dscala=${SCALA_VERSION:0:4}
- name: Run tests
run: |
if [[ "${SPARK_VERSION:0:3}" < "3.3" ]]; then
case "$HADOOP_VERSION" in
3)
export HADOOP_VERSION=3.2
;;
2)
export HADOOP_VERSION=2.7
;;
esac
fi
export SPARKLYR_LOG_FILE='/tmp/sparklyr.log'
source ./.github/workflows/scripts/prepare_sparklyr_sedona_test_env.sh
echo "Apache Sedona jar files: ${SEDONA_JAR_FILES}"
cd ./R/tests
NOT_CRAN='true' Rscript testthat.R
shell: bash
- uses: actions/upload-artifact@v3
if: failure()
with:
name: Worker logs
path: /tmp/sparklyr.log
- name: Dump worker logs on failure
if: failure()
run: cat /tmp/sparklyr.log