[SEDONA-648] Implement Distributed K Nearest Neighbor Join #2805

Workflow file for this run

	name: R build

	on:
	push:
	branches:
	- master
	paths:
	- 'common/**'
	- 'spark/**'
	- 'spark-shaded/**'
	- 'pom.xml'
	- 'R/**'
	- '.github/workflows/r.yml'
	pull_request:
	branches:
	- '*'
	paths:
	- 'common/**'
	- 'spark/**'
	- 'spark-shaded/**'
	- 'pom.xml'
	- 'R/**'
	- '.github/workflows/r.yml'

	env:
	MAVEN_OPTS: -Dmaven.wagon.httpconnectionManager.ttlSeconds=60
	DO_NOT_TRACK: true

	jobs:
	build:
	runs-on: ubuntu-22.04
	strategy:
	fail-fast: true
	matrix:
	spark: [3.0.3, 3.1.2, 3.2.1, 3.3.0, 3.4.0, 3.5.0]
	hadoop: [3]
	scala: [2.12.15]
	r: [oldrel, release]
	env:
	SPARK_VERSION: ${{ matrix.spark }}
	HADOOP_VERSION: ${{ matrix.hadoop }}
	SCALA_VERSION: ${{ matrix.scala }}
	# Ensure the temporary auth token for this workflow, instead of the
	# bundled GitHub PAT from the `remotes` package is used for
	# `remotes::install_github()`
	GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
	steps:
	- name: Delete existing R binaries
	run: \|
	for b in R Rscript
	do
	while [ -n "$(which "$b")" ]
	do
	sudo rm -v "$(which "$b")"
	done
	done
	shell: bash
	- name: Delete existing JDK installation(s)
	run: \|
	sudo apt-get -y remove --purge default-jdk adoptopenjdk-11-hotspot \|\| :
	shell: bash
	- uses: actions/checkout@v4
	- uses: r-lib/actions/[email protected]
	with:
	r-version: ${{ matrix.r }}
	use-public-rspm: true
	- name: Query R dependencies
	uses: r-lib/actions/[email protected]
	with:
	cache: true
	extra-packages: \|
	any::testthat
	any::rcmdcheck
	working-directory : './R'
	- name: Build and check R package
	uses: r-lib/actions/[email protected]
	with:
	build_args: 'c("--no-build-vignettes", "--no-manual")'
	args: 'c("--no-build-vignettes", "--no-manual", "--no-tests")'
	error-on: '"error"'
	working-directory: './R'
	env:
	_R_CHECK_FORCE_SUGGESTS_: false
	- name: Install apache.sedona from source
	run: Rscript -e 'install.packages("./R/", repos = NULL, type = "source")'
	- uses: actions/setup-java@v4
	with:
	distribution: 'temurin'
	java-version: '8'
	cache: 'maven'
	- name: Get OS name
	id: os-name
	run: \|
	# `os_name` will be like "Ubuntu-20.04.1-LTS"
	OS_NAME=$(lsb_release -ds \| sed 's/\s/-/g')
	echo "os-name=$OS_NAME" >> $GITHUB_OUTPUT
	- name: Cache Spark installations
	if: runner.os != 'Windows'
	uses: actions/cache@master
	with:
	path: ~/spark
	key: apache.sedona-apache-spark-${{ steps.os-name.outputs.os-name }}-${{ env.SPARK_VERSION }}
	- name: Build Sedona libraries
	run: \|
	SPARK_COMPAT_VERSION=${SPARK_VERSION:0:3}
	mvn -q clean install -DskipTests -Dspark=${SPARK_COMPAT_VERSION} -Dscala=${SCALA_VERSION:0:4}
	- name: Run tests
	run: \|
	if [[ "${SPARK_VERSION:0:3}" < "3.3" ]]; then
	case "$HADOOP_VERSION" in
	3)
	export HADOOP_VERSION=3.2
	;;
	2)
	export HADOOP_VERSION=2.7
	;;
	esac
	fi
	export SPARKLYR_LOG_FILE='/tmp/sparklyr.log'
	source ./.github/workflows/scripts/prepare_sparklyr_sedona_test_env.sh
	echo "Apache Sedona jar files: ${SEDONA_JAR_FILES}"
	cd ./R/tests
	NOT_CRAN='true' Rscript testthat.R
	shell: bash
	- uses: actions/upload-artifact@v3
	if: failure()
	with:
	name: Worker logs
	path: /tmp/sparklyr.log
	- name: Dump worker logs on failure
	if: failure()
	run: cat /tmp/sparklyr.log

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[SEDONA-648] Implement Distributed K Nearest Neighbor Join #2805

Workflow file

[SEDONA-648] Implement Distributed K Nearest Neighbor Join #2805

Jobs

Run details

Workflow file for this run