From 6be9f80727d69105afea0e9371e2e74a8697fc3d Mon Sep 17 00:00:00 2001 From: Adrian Cole <64215+codefromthecrypt@users.noreply.github.com> Date: Tue, 16 Apr 2024 06:32:58 -1000 Subject: [PATCH] Upgrades to Spark 3.4/JRE 17 and fixes all high/critical CVEs (#226) I manually tested this on all three storage types in docker as well (using zipkin's docker/examples instructions) --------- Signed-off-by: Adrian Cole --- .github/workflows/create_release.yml | 2 +- .github/workflows/deploy.yml | 2 +- .github/workflows/test.yml | 4 +- .github/workflows/test_readme.yml | 2 +- README.md | 2 +- cassandra3/pom.xml | 10 +- .../cassandra3/CassandraDependenciesJob.java | 5 +- .../cassandra3/CassandraRowToSpan.java | 2 +- .../CassandraRowsToDependencyLinks.java | 2 +- .../cassandra3/SpansToDependencyLinks.java | 2 +- .../storage/cassandra/CassandraContainer.java | 2 +- docker/Dockerfile | 10 +- docker/bin/start-zipkin-dependencies | 22 ++- elasticsearch/pom.xml | 2 +- .../ElasticsearchDependenciesJob.java | 51 +++--- .../TraceIdAndJsonToDependencyLinks.java | 2 +- .../elasticsearch/ElasticsearchContainer.java | 2 +- main/pom.xml | 29 +--- main/src/main/resources/log4j.properties | 19 --- main/src/main/resources/log4j2.properties | 14 ++ mysql/pom.xml | 2 +- .../mysql/MySQLDependenciesJob.java | 28 ++-- .../mysql/RowsToDependencyLinks.java | 6 +- .../storage/mysql/v1/MySQLContainer.java | 6 +- pom.xml | 153 +++++++++++++++--- 25 files changed, 245 insertions(+), 136 deletions(-) delete mode 100644 main/src/main/resources/log4j.properties create mode 100644 main/src/main/resources/log4j2.properties diff --git a/.github/workflows/create_release.yml b/.github/workflows/create_release.yml index 371db27..dd82318 100644 --- a/.github/workflows/create_release.yml +++ b/.github/workflows/create_release.yml @@ -23,7 +23,7 @@ jobs: uses: actions/setup-java@v4 with: distribution: 'zulu' # zulu as it supports a wide version range - java-version: '11' # Most recent LTS that passes tests + java-version: '17' # until Spark 4 per SPARK-43831 - name: Cache local Maven repository uses: actions/cache@v3 with: diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index bef38da..7c8ba79 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -26,7 +26,7 @@ jobs: uses: actions/setup-java@v4 with: distribution: 'zulu' # zulu as it supports a wide version range - java-version: '11' # Most recent LTS that passes tests + java-version: '17' # until Spark 4 per SPARK-43831 - name: Cache local Maven repository uses: actions/cache@v3 with: diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 9abde7e..7761d46 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -31,7 +31,7 @@ jobs: uses: actions/setup-java@v4 with: distribution: 'zulu' # zulu as it supports a wide version range - java-version: '11' # Most recent LTS that passes tests + java-version: '17' # until Spark 4 per SPARK-43831 - name: Test without Docker run: build-bin/maven/maven_go_offline && build-bin/test -DexcludedGroups=docker test_docker: @@ -69,7 +69,7 @@ jobs: uses: actions/setup-java@v4 with: distribution: 'zulu' # zulu as it supports a wide version range - java-version: '11' # Most recent LTS that passes tests + java-version: '17' # until Spark 4 per SPARK-43831 - name: Test with Docker run: | build-bin/docker/configure_docker && diff --git a/.github/workflows/test_readme.yml b/.github/workflows/test_readme.yml index 5543df7..626ab9a 100644 --- a/.github/workflows/test_readme.yml +++ b/.github/workflows/test_readme.yml @@ -22,7 +22,7 @@ jobs: uses: actions/setup-java@v4 with: distribution: 'zulu' # zulu as it supports a wide version range - java-version: '11' # Most recent LTS that passes tests + java-version: '17' # until Spark 4 per SPARK-43831 - name: Cache local Maven repository uses: actions/cache@v3 with: diff --git a/README.md b/README.md index aeff0e4..e296e55 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ are supported, including Cassandra, MySQL and Elasticsearch. ## Quick-start -Until [SPARK-43831](https://issues.apache.org/jira/browse/SPARK-43831), Zipkin Dependencies requires Java 11 to run. +Until [SPARK-43831](https://issues.apache.org/jira/browse/SPARK-43831), Zipkin Dependencies requires Java 17 to run. The quickest way to get started is to fetch the [latest released job](https://search.maven.org/remote_content?g=io.zipkin.dependencies&a=zipkin-dependencies&v=LATEST) as a self-contained jar. For example: diff --git a/cassandra3/pom.xml b/cassandra3/pom.xml index 1c9db1e..00375c3 100644 --- a/cassandra3/pom.xml +++ b/cassandra3/pom.xml @@ -11,7 +11,7 @@ io.zipkin.dependencies zipkin-dependencies-parent - 3.1.6-SNAPSHOT + 3.2.0-SNAPSHOT zipkin-dependencies-cassandra3 @@ -19,6 +19,7 @@ ${project.basedir}/.. + 3.1.19 @@ -28,6 +29,13 @@ ${spark-cassandra-connector.version} + + + com.github.jnr + jnr-posix + ${jnr-posix.version} + + io.zipkin.zipkin2 diff --git a/cassandra3/src/main/java/zipkin2/dependencies/cassandra3/CassandraDependenciesJob.java b/cassandra3/src/main/java/zipkin2/dependencies/cassandra3/CassandraDependenciesJob.java index 252fcfc..a8161d8 100644 --- a/cassandra3/src/main/java/zipkin2/dependencies/cassandra3/CassandraDependenciesJob.java +++ b/cassandra3/src/main/java/zipkin2/dependencies/cassandra3/CassandraDependenciesJob.java @@ -150,6 +150,9 @@ public CassandraDependenciesJob build() { df.setTimeZone(TimeZone.getTimeZone("UTC")); this.dateStamp = df.format(new Date(builder.day)); this.conf = new SparkConf(true).setMaster(builder.sparkMaster).setAppName(getClass().getName()); + if (builder.sparkMaster.startsWith("local[")) { + conf.set("spark.driver.bindAddress", "127.0.0.1"); + } conf.set("spark.cassandra.connection.host", parseHosts(builder.contactPoints)); conf.set("spark.cassandra.connection.port", parsePort(builder.contactPoints)); conf.set("spark.cassandra.connection.localDC", builder.localDc); @@ -232,7 +235,7 @@ static String parseHosts(String contactPoints) { List result = new ArrayList<>(); for (String contactPoint : contactPoints.split(",", -1)) { HostAndPort parsed = HostAndPort.fromString(contactPoint); - result.add(parsed.getHostText()); + result.add(parsed.getHost()); } return Joiner.on(',').join(result); } diff --git a/cassandra3/src/main/java/zipkin2/dependencies/cassandra3/CassandraRowToSpan.java b/cassandra3/src/main/java/zipkin2/dependencies/cassandra3/CassandraRowToSpan.java index c97740a..0095741 100644 --- a/cassandra3/src/main/java/zipkin2/dependencies/cassandra3/CassandraRowToSpan.java +++ b/cassandra3/src/main/java/zipkin2/dependencies/cassandra3/CassandraRowToSpan.java @@ -7,12 +7,12 @@ import com.datastax.spark.connector.japi.CassandraRow; import com.datastax.spark.connector.japi.UDTValue; import com.datastax.spark.connector.types.TypeConverter; +import java.io.Serializable; import java.util.Map; import javax.annotation.Nullable; import org.apache.spark.api.java.function.Function; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import scala.Serializable; import zipkin2.Endpoint; import zipkin2.Span; diff --git a/cassandra3/src/main/java/zipkin2/dependencies/cassandra3/CassandraRowsToDependencyLinks.java b/cassandra3/src/main/java/zipkin2/dependencies/cassandra3/CassandraRowsToDependencyLinks.java index b1a5ade..82add62 100644 --- a/cassandra3/src/main/java/zipkin2/dependencies/cassandra3/CassandraRowsToDependencyLinks.java +++ b/cassandra3/src/main/java/zipkin2/dependencies/cassandra3/CassandraRowsToDependencyLinks.java @@ -5,12 +5,12 @@ package zipkin2.dependencies.cassandra3; import com.datastax.spark.connector.japi.CassandraRow; +import java.io.Serializable; import java.util.Iterator; import java.util.LinkedHashSet; import java.util.Set; import javax.annotation.Nullable; import org.apache.spark.api.java.function.FlatMapFunction; -import scala.Serializable; import zipkin2.DependencyLink; import zipkin2.Span; diff --git a/cassandra3/src/main/java/zipkin2/dependencies/cassandra3/SpansToDependencyLinks.java b/cassandra3/src/main/java/zipkin2/dependencies/cassandra3/SpansToDependencyLinks.java index 820c722..4ccd2ac 100644 --- a/cassandra3/src/main/java/zipkin2/dependencies/cassandra3/SpansToDependencyLinks.java +++ b/cassandra3/src/main/java/zipkin2/dependencies/cassandra3/SpansToDependencyLinks.java @@ -4,13 +4,13 @@ */ package zipkin2.dependencies.cassandra3; +import java.io.Serializable; import java.util.ArrayList; import java.util.Collections; import java.util.Iterator; import java.util.List; import javax.annotation.Nullable; import org.apache.spark.api.java.function.FlatMapFunction; -import scala.Serializable; import zipkin2.DependencyLink; import zipkin2.Span; import zipkin2.internal.DependencyLinker; diff --git a/cassandra3/src/test/java/zipkin2/storage/cassandra/CassandraContainer.java b/cassandra3/src/test/java/zipkin2/storage/cassandra/CassandraContainer.java index 2619661..3ea38c2 100644 --- a/cassandra3/src/test/java/zipkin2/storage/cassandra/CassandraContainer.java +++ b/cassandra3/src/test/java/zipkin2/storage/cassandra/CassandraContainer.java @@ -50,7 +50,7 @@ final class CassandraContainer extends GenericContainer { ); CassandraContainer() { - super(parse("ghcr.io/openzipkin/zipkin-cassandra:3.2.1")); + super(parse("ghcr.io/openzipkin/zipkin-cassandra:3.3.0")); addExposedPort(9042); waitStrategy = Wait.forHealthcheck(); withLogConsumer(new Slf4jLogConsumer(LOGGER)); diff --git a/docker/Dockerfile b/docker/Dockerfile index 39ce48a..725ebd2 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -9,8 +9,8 @@ # This is defined in many places because Docker has no "env" script functionality unless you use # docker-compose: When updating, update everywhere. # -# TODO: temporarily 11 until we can update to spark on scala 2.13 -ARG java_version=11.0.22_p7 +# Note: JDK 17 until Spark 4 per SPARK-43831 +ARG java_version=17.0.10_p7 # We copy files from the context into a scratch container first to avoid a problem where docker and # docker-compose don't share layer hashes https://github.com/docker/compose/issues/883 normally. @@ -37,9 +37,9 @@ ENV VERSION=$version ENV MAVEN_PROJECT_BASEDIR=/code RUN /code/build-bin/maven/maven_build_or_unjar io.zipkin.dependencies zipkin-dependencies ${VERSION} -# Try -jre again once Spark dependencies are up to date. Spark 3.0 uses old Hadoop in the -# Elasticsearch driver which needs com.sun.security.auth.module.UnixLoginModule. -FROM ghcr.io/openzipkin/java:11.0.22_p7 as zipkin-dependencies +# Try -jre again once Spark 4.0 is released. Spark 3.x uses old Hadoop in the +# Elasticsearch driver which needs com.sun.security.auth.module.UnixLoginModule +FROM ghcr.io/openzipkin/java:17.0.10_p7 as zipkin-dependencies LABEL org.opencontainers.image.description="Zipkin Dependencies Aggregator on OpenJDK and Alpine Linux" LABEL org.opencontainers.image.source=https://github.com/openzipkin/zipkin-dependencies diff --git a/docker/bin/start-zipkin-dependencies b/docker/bin/start-zipkin-dependencies index 9323968..e6c4b3f 100755 --- a/docker/bin/start-zipkin-dependencies +++ b/docker/bin/start-zipkin-dependencies @@ -8,4 +8,24 @@ # args override it (Ex. 2020-10-31) set -eu -exec java ${JAVA_OPTS} -Djava.io.tmpdir=/tmp -cp classes zipkin2.dependencies.ZipkinDependenciesJob $@ +# Spark 3.4 module config from: +# https://github.com/apache/spark/blob/branch-3.4/launcher/src/main/java/org/apache/spark/launcher/JavaModuleOptions.java#L29 +exec java ${JAVA_OPTS} -Djava.io.tmpdir=/tmp \ + -XX:+IgnoreUnrecognizedVMOptions \ + --add-opens=java.base/java.lang=ALL-UNNAMED \ + --add-opens=java.base/java.lang.invoke=ALL-UNNAMED \ + --add-opens=java.base/java.lang.reflect=ALL-UNNAMED \ + --add-opens=java.base/java.io=ALL-UNNAMED \ + --add-opens=java.base/java.net=ALL-UNNAMED \ + --add-opens=java.base/java.nio=ALL-UNNAMED \ + --add-opens=java.base/java.util=ALL-UNNAMED \ + --add-opens=java.base/java.util.concurrent=ALL-UNNAMED \ + --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED \ + --add-opens=java.base/jdk.internal.ref=ALL-UNNAMED \ + --add-opens=java.base/sun.nio.ch=ALL-UNNAMED \ + --add-opens=java.base/sun.nio.cs=ALL-UNNAMED \ + --add-opens=java.base/sun.security.action=ALL-UNNAMED \ + --add-opens=java.base/sun.util.calendar=ALL-UNNAMED \ + --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED \ + -Djdk.reflect.useDirectMethodHandle=false \ + -cp classes zipkin2.dependencies.ZipkinDependenciesJob $@ diff --git a/elasticsearch/pom.xml b/elasticsearch/pom.xml index e84c16c..6a92241 100644 --- a/elasticsearch/pom.xml +++ b/elasticsearch/pom.xml @@ -11,7 +11,7 @@ io.zipkin.dependencies zipkin-dependencies-parent - 3.1.6-SNAPSHOT + 3.2.0-SNAPSHOT zipkin-dependencies-elasticsearch diff --git a/elasticsearch/src/main/java/zipkin2/dependencies/elasticsearch/ElasticsearchDependenciesJob.java b/elasticsearch/src/main/java/zipkin2/dependencies/elasticsearch/ElasticsearchDependenciesJob.java index 264617e..4cfd635 100644 --- a/elasticsearch/src/main/java/zipkin2/dependencies/elasticsearch/ElasticsearchDependenciesJob.java +++ b/elasticsearch/src/main/java/zipkin2/dependencies/elasticsearch/ElasticsearchDependenciesJob.java @@ -154,6 +154,9 @@ private static String getSystemPropertyAsFileResource(String key) { df.setTimeZone(TimeZone.getTimeZone("UTC")); this.dateStamp = df.format(new Date(builder.day)); this.conf = new SparkConf(true).setMaster(builder.sparkMaster).setAppName(getClass().getName()); + if (builder.sparkMaster.startsWith("local[")) { + conf.set("spark.driver.bindAddress", "127.0.0.1"); + } if (builder.jars != null) conf.setJars(builder.jars); if (builder.username != null) conf.set(ES_NET_HTTP_AUTH_USER, builder.username); if (builder.password != null) conf.set(ES_NET_HTTP_AUTH_PASS, builder.password); @@ -167,33 +170,27 @@ private static String getSystemPropertyAsFileResource(String key) { } public void run() { - run( - index + "-span-" + dateStamp, - index + "-dependency-" + dateStamp, - SpanBytesDecoder.JSON_V2); - - log.info("Done"); - } + String spanResource = index + "-span-" + dateStamp; + String dependencyLinkResource = index + "-dependency-" + dateStamp; + SpanBytesDecoder decoder = SpanBytesDecoder.JSON_V2; - void run(String spanResource, String dependencyLinkResource, SpanBytesDecoder decoder) { log.info("Processing spans from {}", spanResource); - JavaSparkContext sc = new JavaSparkContext(conf); - try { - JavaRDD> links = - JavaEsSpark.esJsonRDD(sc, spanResource) - .groupBy(JSON_TRACE_ID) - .flatMapValues(new TraceIdAndJsonToDependencyLinks(logInitializer, decoder)) - .values() - .mapToPair((PairFunction, DependencyLink>) l -> - new Tuple2<>(new Tuple2<>(l.parent(), l.child()), l)) - .reduceByKey((l, r) -> DependencyLink.newBuilder() - .parent(l.parent()) - .child(l.child()) - .callCount(l.callCount() + r.callCount()) - .errorCount(l.errorCount() + r.errorCount()) - .build()) - .values() - .map(DEPENDENCY_LINK_JSON); + JavaRDD> links; + try (JavaSparkContext sc = new JavaSparkContext(conf)) { + links = JavaEsSpark.esJsonRDD(sc, spanResource) + .groupBy(JSON_TRACE_ID) + .flatMapValues(new TraceIdAndJsonToDependencyLinks(logInitializer, decoder)) + .values() + .mapToPair((PairFunction, DependencyLink>) l -> + new Tuple2<>(new Tuple2<>(l.parent(), l.child()), l)) + .reduceByKey((l, r) -> DependencyLink.newBuilder() + .parent(l.parent()) + .child(l.child()) + .callCount(l.callCount() + r.callCount()) + .errorCount(l.errorCount() + r.errorCount()) + .build()) + .values() + .map(DEPENDENCY_LINK_JSON); if (links.isEmpty()) { log.info("No dependency links could be processed from spans in index {}", spanResource); @@ -204,9 +201,9 @@ void run(String spanResource, String dependencyLinkResource, SpanBytesDecoder de dependencyLinkResource, Collections.singletonMap("es.mapping.id", "id")); // allows overwriting the link } - } finally { - sc.stop(); } + + log.info("Done"); } /** diff --git a/elasticsearch/src/main/java/zipkin2/dependencies/elasticsearch/TraceIdAndJsonToDependencyLinks.java b/elasticsearch/src/main/java/zipkin2/dependencies/elasticsearch/TraceIdAndJsonToDependencyLinks.java index b8c95ab..78fe8da 100644 --- a/elasticsearch/src/main/java/zipkin2/dependencies/elasticsearch/TraceIdAndJsonToDependencyLinks.java +++ b/elasticsearch/src/main/java/zipkin2/dependencies/elasticsearch/TraceIdAndJsonToDependencyLinks.java @@ -4,6 +4,7 @@ */ package zipkin2.dependencies.elasticsearch; +import java.io.Serializable; import java.util.ArrayList; import java.util.Iterator; import java.util.List; @@ -11,7 +12,6 @@ import org.apache.spark.api.java.function.FlatMapFunction; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import scala.Serializable; import scala.Tuple2; import zipkin2.DependencyLink; import zipkin2.Span; diff --git a/elasticsearch/src/test/java/zipkin2/storage/elasticsearch/ElasticsearchContainer.java b/elasticsearch/src/test/java/zipkin2/storage/elasticsearch/ElasticsearchContainer.java index e65a3bc..1f8dabd 100644 --- a/elasticsearch/src/test/java/zipkin2/storage/elasticsearch/ElasticsearchContainer.java +++ b/elasticsearch/src/test/java/zipkin2/storage/elasticsearch/ElasticsearchContainer.java @@ -33,7 +33,7 @@ class ElasticsearchContainer extends GenericContainer { static final Logger LOGGER = LoggerFactory.getLogger(ElasticsearchContainer.class); ElasticsearchContainer(int majorVersion) { - super(parse("ghcr.io/openzipkin/zipkin-elasticsearch" + majorVersion + ":3.2.1")); + super(parse("ghcr.io/openzipkin/zipkin-elasticsearch" + majorVersion + ":3.3.0")); addExposedPort(9200); waitStrategy = Wait.forHealthcheck(); withLogConsumer(new Slf4jLogConsumer(LOGGER)); diff --git a/main/pom.xml b/main/pom.xml index e7f38ee..c61b8a6 100644 --- a/main/pom.xml +++ b/main/pom.xml @@ -11,7 +11,7 @@ io.zipkin.dependencies zipkin-dependencies-parent - 3.1.6-SNAPSHOT + 3.2.0-SNAPSHOT zipkin-dependencies @@ -77,18 +77,6 @@ ** - - org.apache.hadoop:hadoop-common - - ** - - - - log4j:log4j - - ** - - io.netty:* @@ -128,22 +116,15 @@ - - com.datastax.oss:java-driver-core-shaded + + com.github.jnr:jnr-posix ** - - commons-httpclient:commons-httpclient - - ** - - - - - xerces:xercesImpl + + com.datastax.oss:java-driver-core-shaded ** diff --git a/main/src/main/resources/log4j.properties b/main/src/main/resources/log4j.properties deleted file mode 100644 index fe2a90c..0000000 --- a/main/src/main/resources/log4j.properties +++ /dev/null @@ -1,19 +0,0 @@ -# Set everything to be logged to the console -log4j.rootCategory=WARN, console -log4j.appender.console=org.apache.log4j.ConsoleAppender -log4j.appender.console.target=System.err -log4j.appender.console.layout=org.apache.log4j.PatternLayout -log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n - -# Settings to quiet third party logs that are too verbose -log4j.logger.org.spark-project.jetty=WARN -log4j.logger.org.spark-project.jetty.util.component.AbstractLifeCycle=ERROR -log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO -log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO - -# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support -log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=FATAL -log4j.logger.org.apache.hadoop.hive.ql.exec.FunctionRegistry=ERROR - -# java.lang.NoClassDefFoundError: jnr/posix/POSIXHandler -log4j.logger.com.datastax.oss.driver.internal.core.os.Native=WARN diff --git a/main/src/main/resources/log4j2.properties b/main/src/main/resources/log4j2.properties new file mode 100644 index 0000000..891db76 --- /dev/null +++ b/main/src/main/resources/log4j2.properties @@ -0,0 +1,14 @@ +# Set everything to be logged to the console +appenders=console +appender.console.type=Console +appender.console.name=STDOUT +appender.console.layout.type=PatternLayout +appender.console.layout.pattern=%d{ABSOLUTE} %-5p [%t] %C{2} (%F:%L) - %m%n + +rootLogger.level=warn +rootLogger.appenderRefs=stdout +rootLogger.appenderRef.stdout.ref=STDOUT + +# Make sure basic status is logged for all backends +logger.zipkin2.name = zipkin2.dependencies +logger.zipkin2.level = info diff --git a/mysql/pom.xml b/mysql/pom.xml index e6edec5..4bceac5 100644 --- a/mysql/pom.xml +++ b/mysql/pom.xml @@ -11,7 +11,7 @@ io.zipkin.dependencies zipkin-dependencies-parent - 3.1.6-SNAPSHOT + 3.2.0-SNAPSHOT zipkin-dependencies-mysql diff --git a/mysql/src/main/java/zipkin2/dependencies/mysql/MySQLDependenciesJob.java b/mysql/src/main/java/zipkin2/dependencies/mysql/MySQLDependenciesJob.java index 495c8d2..f769cab 100644 --- a/mysql/src/main/java/zipkin2/dependencies/mysql/MySQLDependenciesJob.java +++ b/mysql/src/main/java/zipkin2/dependencies/mysql/MySQLDependenciesJob.java @@ -158,6 +158,9 @@ public MySQLDependenciesJob build() { this.conf = new SparkConf(true) .setMaster(builder.sparkMaster) .setAppName(getClass().getName()); + if (builder.sparkMaster.startsWith("local[")) { + conf.set("spark.driver.bindAddress", "127.0.0.1"); + } if (builder.jars != null) conf.setJars(builder.jars); for (Map.Entry entry : builder.sparkProperties.entrySet()) { conf.set(entry.getKey(), entry.getValue()); @@ -182,22 +185,24 @@ public void run() { String fields = "s.trace_id, s.parent_id, s.id, a.a_key, a.endpoint_service_name, a.a_type"; if (hasTraceIdHigh) fields = "s.trace_id_high, " + fields; String groupByFields = fields.replace("s.parent_id, ", ""); - String linksQuery = String.format( - "select distinct %s "+ - "from zipkin_spans s left outer join zipkin_annotations a on " + - " (s.trace_id = a.trace_id and s.id = a.span_id " + - " and a.a_key in ('lc', 'ca', 'cs', 'sa', 'sr', 'ma', 'ms', 'mr', 'error')) " + - "where s.start_ts between %s and %s group by %s", - fields, microsLower, microsUpper, groupByFields); + String linksQuery = ( + """ + select distinct %s \ + from zipkin_spans s left outer join zipkin_annotations a on \ + (s.trace_id = a.trace_id and s.id = a.span_id \ + and a.a_key in ('lc', 'ca', 'cs', 'sa', 'sr', 'ma', 'ms', 'mr', 'error')) \ + where s.start_ts between %s and %s group by %s\ + """).formatted( + fields, microsLower, microsUpper, groupByFields); options.put("dbtable", "(" + linksQuery + ") as link_spans"); log.info("Running Dependencies job for {}: start_ts between {} and {}", dateStamp, microsLower, microsUpper); - JavaSparkContext sc = new JavaSparkContext(conf); - - List links = new SQLContext(sc).read() + List links; + try (JavaSparkContext sc = new JavaSparkContext(conf)) { + links = new SQLContext(sc).read() .format("org.apache.spark.sql.execution.datasources.jdbc.JdbcRelationProvider") .options(options) .load() @@ -213,8 +218,7 @@ public void run() { .errorCount(l.errorCount() + r.errorCount()) .build()) .values().collect(); - - sc.stop(); + } log.info("Saving with day=" + dateStamp); saveToMySQL(links); diff --git a/mysql/src/main/java/zipkin2/dependencies/mysql/RowsToDependencyLinks.java b/mysql/src/main/java/zipkin2/dependencies/mysql/RowsToDependencyLinks.java index 6caadc1..0278f16 100644 --- a/mysql/src/main/java/zipkin2/dependencies/mysql/RowsToDependencyLinks.java +++ b/mysql/src/main/java/zipkin2/dependencies/mysql/RowsToDependencyLinks.java @@ -4,6 +4,7 @@ */ package zipkin2.dependencies.mysql; +import java.io.Serializable; import java.util.ArrayList; import java.util.Collections; import java.util.Iterator; @@ -11,13 +12,12 @@ import javax.annotation.Nullable; import org.apache.spark.api.java.function.FlatMapFunction; import org.apache.spark.sql.Row; -import scala.Serializable; import zipkin2.DependencyLink; import zipkin2.Span; import zipkin2.internal.DependencyLinker; final class RowsToDependencyLinks - implements Serializable, FlatMapFunction, DependencyLink> { + implements Serializable, FlatMapFunction, DependencyLink> { private static final long serialVersionUID = 0L; @Nullable final Runnable logInitializer; @@ -31,7 +31,7 @@ final class RowsToDependencyLinks @Override public Iterator call(Iterable rows) { if (logInitializer != null) logInitializer.run(); Iterator> traces = - new DependencyLinkSpanIterator.ByTraceId(rows.iterator(), hasTraceIdHigh); + new DependencyLinkSpanIterator.ByTraceId(rows.iterator(), hasTraceIdHigh); if (!traces.hasNext()) return Collections.emptyIterator(); diff --git a/mysql/src/test/java/zipkin2/storage/mysql/v1/MySQLContainer.java b/mysql/src/test/java/zipkin2/storage/mysql/v1/MySQLContainer.java index 8965052..43573f6 100644 --- a/mysql/src/test/java/zipkin2/storage/mysql/v1/MySQLContainer.java +++ b/mysql/src/test/java/zipkin2/storage/mysql/v1/MySQLContainer.java @@ -24,7 +24,7 @@ final class MySQLContainer extends GenericContainer { static final Logger LOGGER = LoggerFactory.getLogger(MySQLContainer.class); MySQLContainer() { - super(parse("ghcr.io/openzipkin/zipkin-mysql:3.2.1")); + super(parse("ghcr.io/openzipkin/zipkin-mysql:3.3.0")); addExposedPort(3306); waitStrategy = Wait.forHealthcheck(); withLogConsumer(new Slf4jLogConsumer(LOGGER)); @@ -39,8 +39,8 @@ MySQLStorage.Builder newStorageBuilder() { final MariaDbDataSource dataSource; try { - dataSource = new MariaDbDataSource(String.format( - "jdbc:mysql://%s:%s/zipkin?permitMysqlScheme&autoReconnect=true&useUnicode=yes&characterEncoding=UTF-8", + dataSource = new MariaDbDataSource( + "jdbc:mysql://%s:%s/zipkin?permitMysqlScheme&autoReconnect=true&useUnicode=yes&characterEncoding=UTF-8".formatted( host(), port())); dataSource.setUser("zipkin"); dataSource.setPassword("zipkin"); diff --git a/pom.xml b/pom.xml index 60d1ffc..96c80ea 100755 --- a/pom.xml +++ b/pom.xml @@ -10,7 +10,7 @@ io.zipkin.dependencies zipkin-dependencies-parent - 3.1.6-SNAPSHOT + 3.2.0-SNAPSHOT pom @@ -28,41 +28,54 @@ ${project.basedir} - - 11 - 11 + + 17 + 17 - 11 + 17 - 11 - 11 + 17 + 17 - 11 + 17 + + + + -XX:+IgnoreUnrecognizedVMOptions + --add-opens=java.base/java.lang=ALL-UNNAMED + --add-opens=java.base/java.lang.invoke=ALL-UNNAMED + --add-opens=java.base/java.lang.reflect=ALL-UNNAMED + --add-opens=java.base/java.io=ALL-UNNAMED + --add-opens=java.base/java.net=ALL-UNNAMED + --add-opens=java.base/java.nio=ALL-UNNAMED + --add-opens=java.base/java.util=ALL-UNNAMED + --add-opens=java.base/java.util.concurrent=ALL-UNNAMED + --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED + --add-opens=java.base/jdk.internal.ref=ALL-UNNAMED + --add-opens=java.base/sun.nio.ch=ALL-UNNAMED + --add-opens=java.base/sun.nio.cs=ALL-UNNAMED + --add-opens=java.base/sun.security.action=ALL-UNNAMED + --add-opens=java.base/sun.util.calendar=ALL-UNNAMED + --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED + -Djdk.reflect.useDirectMethodHandle=false + 2.26.1 - - 2.12 - - 3.3.4 + + 2.13 + 3.4.2 - 8.13.2 + 8.14.0-SNAPSHOT - 3.3.0 + 3.4.1 4.17.0 @@ -70,12 +83,22 @@ 3.3.3 + + 1.11.3 + 1.26.1 + 32.1.3-jre + 2.5.2 + 3.25.1 + 2.13.13 + 1.1.10.5 + 3.9.2 + 5.10.2 1.19.7 - 3.2.1 + 3.3.0 1.28.0 4.1.108.Final @@ -156,6 +179,20 @@ https://github.com/openzipkin/zipkin-dependencies/issues + + + + sonatype-snapshots + https://oss.sonatype.org/content/repositories/snapshots + + false + + + true + + + + @@ -187,6 +224,53 @@ pom import + + + + org.apache.avro + avro + ${avro.version} + + + org.apache.commons + commons-compress + ${commons-compress.version} + + + com.google.guava + guava + ${guava.version} + + + org.apache.ivy + ivy + ${ivy.version} + + + com.google.protobuf + protobuf-java + ${protobuf.version} + + + org.scala-lang + scala-library + ${scala.version} + + + org.scala-lang + scala-reflect + ${scala.version} + + + org.xerial.snappy + snappy-java + ${snappy-java.version} + + + org.apache.zookeeper + zookeeper + ${zookeeper.version} + @@ -329,6 +413,9 @@ maven-surefire-plugin ${maven-surefire-plugin.version} + + ${maven.test.argLine} + @@ -345,6 +432,7 @@ + ${maven.test.argLine} false @@ -368,7 +456,8 @@ - [11,12) + + [17,18) @@ -465,10 +554,11 @@ - error-prone-11+ + error-prone-17+ - [11,12) + + [17,18) @@ -493,6 +583,17 @@ -XDcompilePolicy=simple -Xplugin:ErrorProne ${errorprone.args} + + -J--add-exports=jdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED + -J--add-exports=jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED + -J--add-exports=jdk.compiler/com.sun.tools.javac.main=ALL-UNNAMED + -J--add-exports=jdk.compiler/com.sun.tools.javac.model=ALL-UNNAMED + -J--add-exports=jdk.compiler/com.sun.tools.javac.parser=ALL-UNNAMED + -J--add-exports=jdk.compiler/com.sun.tools.javac.processing=ALL-UNNAMED + -J--add-exports=jdk.compiler/com.sun.tools.javac.tree=ALL-UNNAMED + -J--add-exports=jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED + -J--add-opens=jdk.compiler/com.sun.tools.javac.code=ALL-UNNAMED + -J--add-opens=jdk.compiler/com.sun.tools.javac.comp=ALL-UNNAMED