Skip to content

Commit

Permalink
Upgrades to Spark 3.4/JRE 17 and fixes all high/critical CVEs (#226)
Browse files Browse the repository at this point in the history
I manually tested this on all three storage types in docker as well
(using zipkin's docker/examples instructions)

---------

Signed-off-by: Adrian Cole <[email protected]>
  • Loading branch information
codefromthecrypt authored Apr 16, 2024
1 parent 9fb4a5e commit 6be9f80
Show file tree
Hide file tree
Showing 25 changed files with 245 additions and 136 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/create_release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ jobs:
uses: actions/setup-java@v4
with:
distribution: 'zulu' # zulu as it supports a wide version range
java-version: '11' # Most recent LTS that passes tests
java-version: '17' # until Spark 4 per SPARK-43831
- name: Cache local Maven repository
uses: actions/cache@v3
with:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:
uses: actions/setup-java@v4
with:
distribution: 'zulu' # zulu as it supports a wide version range
java-version: '11' # Most recent LTS that passes tests
java-version: '17' # until Spark 4 per SPARK-43831
- name: Cache local Maven repository
uses: actions/cache@v3
with:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ jobs:
uses: actions/setup-java@v4
with:
distribution: 'zulu' # zulu as it supports a wide version range
java-version: '11' # Most recent LTS that passes tests
java-version: '17' # until Spark 4 per SPARK-43831
- name: Test without Docker
run: build-bin/maven/maven_go_offline && build-bin/test -DexcludedGroups=docker
test_docker:
Expand Down Expand Up @@ -69,7 +69,7 @@ jobs:
uses: actions/setup-java@v4
with:
distribution: 'zulu' # zulu as it supports a wide version range
java-version: '11' # Most recent LTS that passes tests
java-version: '17' # until Spark 4 per SPARK-43831
- name: Test with Docker
run: |
build-bin/docker/configure_docker &&
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test_readme.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:
uses: actions/setup-java@v4
with:
distribution: 'zulu' # zulu as it supports a wide version range
java-version: '11' # Most recent LTS that passes tests
java-version: '17' # until Spark 4 per SPARK-43831
- name: Cache local Maven repository
uses: actions/cache@v3
with:
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ are supported, including Cassandra, MySQL and Elasticsearch.

## Quick-start

Until [SPARK-43831](https://issues.apache.org/jira/browse/SPARK-43831), Zipkin Dependencies requires Java 11 to run.
Until [SPARK-43831](https://issues.apache.org/jira/browse/SPARK-43831), Zipkin Dependencies requires Java 17 to run.

The quickest way to get started is to fetch the [latest released job](https://search.maven.org/remote_content?g=io.zipkin.dependencies&a=zipkin-dependencies&v=LATEST) as a self-contained jar. For example:

Expand Down
10 changes: 9 additions & 1 deletion cassandra3/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,15 @@
<parent>
<groupId>io.zipkin.dependencies</groupId>
<artifactId>zipkin-dependencies-parent</artifactId>
<version>3.1.6-SNAPSHOT</version>
<version>3.2.0-SNAPSHOT</version>
</parent>

<artifactId>zipkin-dependencies-cassandra3</artifactId>
<name>Zipkin Dependencies: Cassandra</name>

<properties>
<main.basedir>${project.basedir}/..</main.basedir>
<jnr-posix.version>3.1.19</jnr-posix.version>
</properties>

<dependencies>
Expand All @@ -28,6 +29,13 @@
<version>${spark-cassandra-connector.version}</version>
</dependency>

<!-- avoid warning at startup -->
<dependency>
<groupId>com.github.jnr</groupId>
<artifactId>jnr-posix</artifactId>
<version>${jnr-posix.version}</version>
</dependency>

<!-- integration tests -->
<dependency>
<groupId>io.zipkin.zipkin2</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,9 @@ public CassandraDependenciesJob build() {
df.setTimeZone(TimeZone.getTimeZone("UTC"));
this.dateStamp = df.format(new Date(builder.day));
this.conf = new SparkConf(true).setMaster(builder.sparkMaster).setAppName(getClass().getName());
if (builder.sparkMaster.startsWith("local[")) {
conf.set("spark.driver.bindAddress", "127.0.0.1");
}
conf.set("spark.cassandra.connection.host", parseHosts(builder.contactPoints));
conf.set("spark.cassandra.connection.port", parsePort(builder.contactPoints));
conf.set("spark.cassandra.connection.localDC", builder.localDc);
Expand Down Expand Up @@ -232,7 +235,7 @@ static String parseHosts(String contactPoints) {
List<String> result = new ArrayList<>();
for (String contactPoint : contactPoints.split(",", -1)) {
HostAndPort parsed = HostAndPort.fromString(contactPoint);
result.add(parsed.getHostText());
result.add(parsed.getHost());
}
return Joiner.on(',').join(result);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@
import com.datastax.spark.connector.japi.CassandraRow;
import com.datastax.spark.connector.japi.UDTValue;
import com.datastax.spark.connector.types.TypeConverter;
import java.io.Serializable;
import java.util.Map;
import javax.annotation.Nullable;
import org.apache.spark.api.java.function.Function;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Serializable;
import zipkin2.Endpoint;
import zipkin2.Span;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
package zipkin2.dependencies.cassandra3;

import com.datastax.spark.connector.japi.CassandraRow;
import java.io.Serializable;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.Set;
import javax.annotation.Nullable;
import org.apache.spark.api.java.function.FlatMapFunction;
import scala.Serializable;
import zipkin2.DependencyLink;
import zipkin2.Span;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@
*/
package zipkin2.dependencies.cassandra3;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import javax.annotation.Nullable;
import org.apache.spark.api.java.function.FlatMapFunction;
import scala.Serializable;
import zipkin2.DependencyLink;
import zipkin2.Span;
import zipkin2.internal.DependencyLinker;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ final class CassandraContainer extends GenericContainer<CassandraContainer> {
);

CassandraContainer() {
super(parse("ghcr.io/openzipkin/zipkin-cassandra:3.2.1"));
super(parse("ghcr.io/openzipkin/zipkin-cassandra:3.3.0"));
addExposedPort(9042);
waitStrategy = Wait.forHealthcheck();
withLogConsumer(new Slf4jLogConsumer(LOGGER));
Expand Down
10 changes: 5 additions & 5 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
# This is defined in many places because Docker has no "env" script functionality unless you use
# docker-compose: When updating, update everywhere.
#
# TODO: temporarily 11 until we can update to spark on scala 2.13
ARG java_version=11.0.22_p7
# Note: JDK 17 until Spark 4 per SPARK-43831
ARG java_version=17.0.10_p7

# We copy files from the context into a scratch container first to avoid a problem where docker and
# docker-compose don't share layer hashes https://github.com/docker/compose/issues/883 normally.
Expand All @@ -37,9 +37,9 @@ ENV VERSION=$version
ENV MAVEN_PROJECT_BASEDIR=/code
RUN /code/build-bin/maven/maven_build_or_unjar io.zipkin.dependencies zipkin-dependencies ${VERSION}

# Try -jre again once Spark dependencies are up to date. Spark 3.0 uses old Hadoop in the
# Elasticsearch driver which needs com.sun.security.auth.module.UnixLoginModule.
FROM ghcr.io/openzipkin/java:11.0.22_p7 as zipkin-dependencies
# Try -jre again once Spark 4.0 is released. Spark 3.x uses old Hadoop in the
# Elasticsearch driver which needs com.sun.security.auth.module.UnixLoginModule
FROM ghcr.io/openzipkin/java:17.0.10_p7 as zipkin-dependencies
LABEL org.opencontainers.image.description="Zipkin Dependencies Aggregator on OpenJDK and Alpine Linux"
LABEL org.opencontainers.image.source=https://github.com/openzipkin/zipkin-dependencies

Expand Down
22 changes: 21 additions & 1 deletion docker/bin/start-zipkin-dependencies
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,24 @@
# args override it (Ex. 2020-10-31)
set -eu

exec java ${JAVA_OPTS} -Djava.io.tmpdir=/tmp -cp classes zipkin2.dependencies.ZipkinDependenciesJob $@
# Spark 3.4 module config from:
# https://github.com/apache/spark/blob/branch-3.4/launcher/src/main/java/org/apache/spark/launcher/JavaModuleOptions.java#L29
exec java ${JAVA_OPTS} -Djava.io.tmpdir=/tmp \
-XX:+IgnoreUnrecognizedVMOptions \
--add-opens=java.base/java.lang=ALL-UNNAMED \
--add-opens=java.base/java.lang.invoke=ALL-UNNAMED \
--add-opens=java.base/java.lang.reflect=ALL-UNNAMED \
--add-opens=java.base/java.io=ALL-UNNAMED \
--add-opens=java.base/java.net=ALL-UNNAMED \
--add-opens=java.base/java.nio=ALL-UNNAMED \
--add-opens=java.base/java.util=ALL-UNNAMED \
--add-opens=java.base/java.util.concurrent=ALL-UNNAMED \
--add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED \
--add-opens=java.base/jdk.internal.ref=ALL-UNNAMED \
--add-opens=java.base/sun.nio.ch=ALL-UNNAMED \
--add-opens=java.base/sun.nio.cs=ALL-UNNAMED \
--add-opens=java.base/sun.security.action=ALL-UNNAMED \
--add-opens=java.base/sun.util.calendar=ALL-UNNAMED \
--add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED \
-Djdk.reflect.useDirectMethodHandle=false \
-cp classes zipkin2.dependencies.ZipkinDependenciesJob $@
2 changes: 1 addition & 1 deletion elasticsearch/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
<parent>
<groupId>io.zipkin.dependencies</groupId>
<artifactId>zipkin-dependencies-parent</artifactId>
<version>3.1.6-SNAPSHOT</version>
<version>3.2.0-SNAPSHOT</version>
</parent>

<artifactId>zipkin-dependencies-elasticsearch</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,9 @@ private static String getSystemPropertyAsFileResource(String key) {
df.setTimeZone(TimeZone.getTimeZone("UTC"));
this.dateStamp = df.format(new Date(builder.day));
this.conf = new SparkConf(true).setMaster(builder.sparkMaster).setAppName(getClass().getName());
if (builder.sparkMaster.startsWith("local[")) {
conf.set("spark.driver.bindAddress", "127.0.0.1");
}
if (builder.jars != null) conf.setJars(builder.jars);
if (builder.username != null) conf.set(ES_NET_HTTP_AUTH_USER, builder.username);
if (builder.password != null) conf.set(ES_NET_HTTP_AUTH_PASS, builder.password);
Expand All @@ -167,33 +170,27 @@ private static String getSystemPropertyAsFileResource(String key) {
}

public void run() {
run(
index + "-span-" + dateStamp,
index + "-dependency-" + dateStamp,
SpanBytesDecoder.JSON_V2);

log.info("Done");
}
String spanResource = index + "-span-" + dateStamp;
String dependencyLinkResource = index + "-dependency-" + dateStamp;
SpanBytesDecoder decoder = SpanBytesDecoder.JSON_V2;

void run(String spanResource, String dependencyLinkResource, SpanBytesDecoder decoder) {
log.info("Processing spans from {}", spanResource);
JavaSparkContext sc = new JavaSparkContext(conf);
try {
JavaRDD<Map<String, Object>> links =
JavaEsSpark.esJsonRDD(sc, spanResource)
.groupBy(JSON_TRACE_ID)
.flatMapValues(new TraceIdAndJsonToDependencyLinks(logInitializer, decoder))
.values()
.mapToPair((PairFunction<DependencyLink, Tuple2<String, String>, DependencyLink>) l ->
new Tuple2<>(new Tuple2<>(l.parent(), l.child()), l))
.reduceByKey((l, r) -> DependencyLink.newBuilder()
.parent(l.parent())
.child(l.child())
.callCount(l.callCount() + r.callCount())
.errorCount(l.errorCount() + r.errorCount())
.build())
.values()
.map(DEPENDENCY_LINK_JSON);
JavaRDD<Map<String, Object>> links;
try (JavaSparkContext sc = new JavaSparkContext(conf)) {
links = JavaEsSpark.esJsonRDD(sc, spanResource)
.groupBy(JSON_TRACE_ID)
.flatMapValues(new TraceIdAndJsonToDependencyLinks(logInitializer, decoder))
.values()
.mapToPair((PairFunction<DependencyLink, Tuple2<String, String>, DependencyLink>) l ->
new Tuple2<>(new Tuple2<>(l.parent(), l.child()), l))
.reduceByKey((l, r) -> DependencyLink.newBuilder()
.parent(l.parent())
.child(l.child())
.callCount(l.callCount() + r.callCount())
.errorCount(l.errorCount() + r.errorCount())
.build())
.values()
.map(DEPENDENCY_LINK_JSON);

if (links.isEmpty()) {
log.info("No dependency links could be processed from spans in index {}", spanResource);
Expand All @@ -204,9 +201,9 @@ void run(String spanResource, String dependencyLinkResource, SpanBytesDecoder de
dependencyLinkResource,
Collections.singletonMap("es.mapping.id", "id")); // allows overwriting the link
}
} finally {
sc.stop();
}

log.info("Done");
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@
*/
package zipkin2.dependencies.elasticsearch;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import javax.annotation.Nullable;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Serializable;
import scala.Tuple2;
import zipkin2.DependencyLink;
import zipkin2.Span;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class ElasticsearchContainer extends GenericContainer<ElasticsearchContainer> {
static final Logger LOGGER = LoggerFactory.getLogger(ElasticsearchContainer.class);

ElasticsearchContainer(int majorVersion) {
super(parse("ghcr.io/openzipkin/zipkin-elasticsearch" + majorVersion + ":3.2.1"));
super(parse("ghcr.io/openzipkin/zipkin-elasticsearch" + majorVersion + ":3.3.0"));
addExposedPort(9200);
waitStrategy = Wait.forHealthcheck();
withLogConsumer(new Slf4jLogConsumer(LOGGER));
Expand Down
29 changes: 5 additions & 24 deletions main/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
<parent>
<groupId>io.zipkin.dependencies</groupId>
<artifactId>zipkin-dependencies-parent</artifactId>
<version>3.1.6-SNAPSHOT</version>
<version>3.2.0-SNAPSHOT</version>
</parent>

<artifactId>zipkin-dependencies</artifactId>
Expand Down Expand Up @@ -77,18 +77,6 @@
<include>**</include>
</includes>
</filter>
<filter>
<artifact>org.apache.hadoop:hadoop-common</artifact>
<includes>
<include>**</include>
</includes>
</filter>
<filter>
<artifact>log4j:log4j</artifact>
<includes>
<include>**</include>
</includes>
</filter>
<filter>
<artifact>io.netty:*</artifact>
<includes>
Expand Down Expand Up @@ -128,22 +116,15 @@
</includes>
</filter>
<filter>
<!-- com.datastax.oss.driver.internal.core.metrics.DropwizardMetricsFactory -->
<artifact>com.datastax.oss:java-driver-core-shaded</artifact>
<!-- jnr.posix.POSIXHandler -->
<artifact>com.github.jnr:jnr-posix</artifact>
<includes>
<include>**</include>
</includes>
</filter>
<filter>
<!-- elasticsearch -->
<artifact>commons-httpclient:commons-httpclient</artifact>
<includes>
<include>**</include>
</includes>
</filter>
<filter>
<!-- org.apache.xerces.jaxp.DocumentBuilderFactoryImpl -->
<artifact>xerces:xercesImpl</artifact>
<!-- com.datastax.oss.driver.internal.core.metrics.DropwizardMetricsFactory -->
<artifact>com.datastax.oss:java-driver-core-shaded</artifact>
<includes>
<include>**</include>
</includes>
Expand Down
19 changes: 0 additions & 19 deletions main/src/main/resources/log4j.properties

This file was deleted.

Loading

0 comments on commit 6be9f80

Please sign in to comment.