Skip to content

Commit

Permalink
Single codemod execution performance improvements (#468)
Browse files Browse the repository at this point in the history
* Only load codemod resources if needed (a codemod for the provider is
active)
* Cache AppScan location data
  • Loading branch information
nahsra authored Nov 17, 2024
1 parent d9b49a0 commit 0fd8818
Show file tree
Hide file tree
Showing 10 changed files with 140 additions and 58 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -366,10 +366,15 @@ public Integer call() throws IOException {
log.debug("excluding paths: {}", pathExcludes);

// get all files that match
log.debug("Listing source directories");
List<SourceDirectory> sourceDirectories =
sourceDirectoryLister.listJavaSourceDirectories(List.of(projectDirectory));

log.debug("Listing files");
List<Path> filePaths = fileFinder.findFiles(projectPath, includesExcludes);

log.debug("Creating codemod regulator");

// get codemod includes/excludes
final CodemodRegulator regulator;
if (codemodIncludes != null && codemodExcludes != null) {
Expand All @@ -386,10 +391,13 @@ public Integer call() throws IOException {
}

// create the loader
log.debug("Loading input files");
CodeDirectory codeDirectory = new DefaultCodeDirectory(projectPath);
List<Path> sarifFiles = convertToPaths(sarifs);
List<Path> sonarIssuesJsonFiles = convertToPaths(sonarIssuesJsonFilePaths);
List<Path> sonarHotspotJsonFiles = convertToPaths(sonarHotspotsJsonFilePaths);

log.debug("Parsing SARIFs");
Map<String, List<RuleSarif>> pathSarifMap =
SarifParser.create().parseIntoMap(sarifFiles, codeDirectory);
List<ParameterArgument> codemodParameters =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
import java.util.regex.Pattern;
import javax.inject.Inject;
import org.jetbrains.annotations.VisibleForTesting;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/** This type is responsible for loading codemods and the surrounding subsystem. */
public final class CodemodLoader {
Expand All @@ -33,6 +35,8 @@ public CodemodLoader(
final Path defectDojoFindingsJsonFile,
final Path contrastVulnerabilitiesXmlFilePath) {

log.debug("Loading providers");

// get all the providers ready for dependency injection & codemod instantiation
final List<CodemodProvider> providers =
ServiceLoader.load(CodemodProvider.class).stream()
Expand Down Expand Up @@ -106,6 +110,7 @@ public CodemodLoader(
wantsSarif.stream()
.flatMap(toolName -> ruleSarifByTool.getOrDefault(toolName, List.of()).stream())
.toList();
log.debug("Loading modules from provider: {}", provider.getClass().getSimpleName());
final Set<AbstractModule> modules =
provider.getModules(
repositoryDir,
Expand All @@ -125,7 +130,9 @@ public CodemodLoader(
final List<CodemodIdPair> codemods = new ArrayList<>();

// validate and instantiate the codemods
log.debug("Instantiating codemods");
final Injector injector = Guice.createInjector(allModules);
log.debug("Codemods instantiated");
final Set<String> codemodIds = new HashSet<>();
for (final Class<? extends CodeChanger> type : orderedCodemodTypes) {
final Codemod codemodAnnotation = type.getAnnotation(Codemod.class);
Expand Down Expand Up @@ -164,6 +171,8 @@ static boolean isValidCodemodId(final String codemodId) {
return codemodIdPattern.matcher(codemodId).matches();
}

private static final Logger log = LoggerFactory.getLogger(CodemodLoader.class);

private static final Pattern codemodIdPattern =
Pattern.compile("^([A-Za-z0-9]+):(([A-Za-z0-9]+)/)+([A-Za-z0-9\\-\\.]+)$");
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import com.contrastsecurity.sarif.SarifSchema210;
import com.fasterxml.jackson.databind.ObjectMapper;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.*;
Expand All @@ -17,10 +18,14 @@ final class DefaultSarifParser implements SarifParser {

private Optional<SarifSchema210> readSarifFile(final Path sarifFile) {
try {
return Optional.of(
new ObjectMapper().readValue(Files.newInputStream(sarifFile), SarifSchema210.class));
log.debug("Reading input file: {}", sarifFile);
InputStream stream = Files.newInputStream(sarifFile);
log.debug("Parsing to SARIF input files");
SarifSchema210 sarif = new ObjectMapper().readValue(stream, SarifSchema210.class);
log.debug("Parsed SARIF input files");
return Optional.of(sarif);
} catch (final IOException e) {
LOG.error("Problem deserializing SARIF file: {}", sarifFile, e);
log.error("Problem deserializing SARIF file: {}", sarifFile, e);
return Optional.empty();
}
}
Expand All @@ -33,14 +38,15 @@ private Optional<Map.Entry<String, RuleSarif>> tryToBuild(
final CodeDirectory codeDirectory,
final List<RuleSarifFactory> factories) {
for (final var factory : factories) {
log.debug("Building SARIF: {}", factory.getClass().getSimpleName());
final var maybeRuleSarif =
factory.build(toolName, rule.ruleId, rule.messageText, sarif, codeDirectory);
if (maybeRuleSarif.isPresent()) {
return Optional.of(Map.entry(toolName, maybeRuleSarif.get()));
}
}

LOG.info("Found SARIF from unsupported tool: {}", toolName);
log.info("Found SARIF from unsupported tool: {}", toolName);
return Optional.empty();
}

Expand All @@ -60,7 +66,7 @@ private RuleDescriptor extractRuleId(final Result result, final Run run) {
if (maybeRule.isPresent()) {
return maybeRule.get();
} else {
LOG.info("Could not find rule id for result.");
log.info("Could not find rule id for result.");
return null;
}
}
Expand All @@ -72,10 +78,12 @@ private Stream<Map.Entry<String, RuleSarif>> fromSarif(
final Run run, final SarifSchema210 sarif, final CodeDirectory codeDirectory) {
// driver name
final var toolName = run.getTool().getDriver().getName();
log.debug("Loading SARIF rule factories");
final List<RuleSarifFactory> factories =
ServiceLoader.load(RuleSarifFactory.class).stream()
.map(ServiceLoader.Provider::get)
.toList();
log.debug("Done loading SARIF rule factories");
final var runResults = run.getResults();
final var allResults =
runResults != null
Expand Down Expand Up @@ -105,16 +113,18 @@ public Map<String, List<RuleSarif>> parseIntoMap(
.flatMap(
sarif -> sarif.getRuns().stream().flatMap(run -> fromSarif(run, sarif, codeDirectory)))
.forEach(
p ->
map.merge(
p.getKey(),
new ArrayList<>(Collections.singletonList(p.getValue())),
(l1, l2) -> {
l1.add(l2.get(0));
return l1;
}));
p -> {
log.debug("Merging SARIF results");
map.merge(
p.getKey(),
new ArrayList<>(Collections.singletonList(p.getValue())),
(l1, l2) -> {
l1.add(l2.get(0));
return l1;
});
});
return map;
}

private static final Logger LOG = LoggerFactory.getLogger(DefaultSarifParser.class);
private static final Logger log = LoggerFactory.getLogger(DefaultSarifParser.class);
}
Original file line number Diff line number Diff line change
@@ -1,58 +1,32 @@
package io.codemodder.providers.sarif.appscan;

import com.contrastsecurity.sarif.*;
import io.codemodder.CodeDirectory;
import io.codemodder.RuleSarif;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.nio.file.Path;
import java.util.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/** A {@link RuleSarif} for AppScan results. */
final class AppScanRuleSarif implements RuleSarif {

private final SarifSchema210 sarif;
private final String messageText;
private final Map<Path, List<Result>> resultsCache;
private final List<String> locations;

/** A map of a AppScan SARIF "location" URIs mapped to their respective file paths. */
private final Map<Path, Set<Integer>> artifactLocationIndices;
private final AppScanSarifLocationData sarifLocationData;

/**
* Creates an {@link AppScanRuleSarif} that has already done the work of mapping AppScan SARIF
* locations, which are strange combinations of class name and file path, into predictable paths.
*/
AppScanRuleSarif(
final String messageText, final SarifSchema210 sarif, final CodeDirectory codeDirectory) {
final String messageText,
final SarifSchema210 sarif,
final AppScanSarifLocationData sarifLocationData) {
this.sarif = Objects.requireNonNull(sarif);
this.messageText = Objects.requireNonNull(messageText);
this.resultsCache = new HashMap<>();
this.locations =
sarif.getRuns().get(0).getArtifacts().stream()
.map(Artifact::getLocation)
.map(ArtifactLocation::getUri)
.map(u -> u.substring(8)) // trim the file:/// of all results
.toList();
Map<Path, Set<Integer>> artifactLocationIndicesMap = new HashMap<>();

for (int i = 0; i < locations.size(); i++) {
final Integer index = i;
String path = locations.get(i);
path = path.replace('\\', '/');
// we have a real but partial path, now we have to find it in the repository
Optional<Path> existingRealPath;
try {
existingRealPath = codeDirectory.findFilesWithTrailingPath(path);
} catch (IOException e) {
throw new UncheckedIOException(e);
}

// add to the map if we found a matching file
existingRealPath.ifPresent(
p -> artifactLocationIndicesMap.computeIfAbsent(p, k -> new HashSet<>()).add(index));
}
this.artifactLocationIndices = Map.copyOf(artifactLocationIndicesMap);
this.sarifLocationData = Objects.requireNonNull(sarifLocationData);
}

@Override
Expand All @@ -71,6 +45,9 @@ public List<Region> getRegionsFromResultsByRule(final Path path) {
*/
@Override
public List<Result> getResultsByLocationPath(final Path path) {

Map<Path, Set<Integer>> artifactLocationIndices =
sarifLocationData.getArtifactLocationIndices();
return resultsCache.computeIfAbsent(
path,
p ->
Expand Down Expand Up @@ -117,4 +94,6 @@ public String getRule() {
}

static final String toolName = "HCL AppScan Static Analyzer";

private static final Logger log = LoggerFactory.getLogger(AppScanRuleSarif.class);
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,18 @@
import io.codemodder.CodeDirectory;
import io.codemodder.RuleSarif;
import io.codemodder.RuleSarifFactory;
import java.util.Optional;
import java.util.*;

/** A factory for building {@link AppScanRuleSarif}s. */
public final class AppScanRuleSarifFactory implements RuleSarifFactory {

/** A map of a AppScan SARIF "location" URIs mapped to their respective file paths. */
private final Map<SarifSchema210, AppScanSarifLocationData> sarifLocationDataCache;

public AppScanRuleSarifFactory() {
this.sarifLocationDataCache = new HashMap<>();
}

@Override
public Optional<RuleSarif> build(
final String toolName,
Expand All @@ -17,7 +24,12 @@ public Optional<RuleSarif> build(
final SarifSchema210 sarif,
final CodeDirectory codeDirectory) {
if (AppScanRuleSarif.toolName.equals(toolName)) {
return Optional.of(new AppScanRuleSarif(messageText, sarif, codeDirectory));
AppScanSarifLocationData sarifLocationData = sarifLocationDataCache.get(sarif);
if (sarifLocationData == null) {
sarifLocationData = new AppScanSarifLocationData(sarif, codeDirectory);
sarifLocationDataCache.put(sarif, sarifLocationData);
}
return Optional.of(new AppScanRuleSarif(messageText, sarif, sarifLocationData));
}
return Optional.empty();
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
package io.codemodder.providers.sarif.appscan;

import com.contrastsecurity.sarif.Artifact;
import com.contrastsecurity.sarif.ArtifactLocation;
import com.contrastsecurity.sarif.SarifSchema210;
import io.codemodder.CodeDirectory;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.nio.file.Path;
import java.util.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/** Holds the locations of the artifacts in the SARIF file. */
final class AppScanSarifLocationData {

private final Map<Path, Set<Integer>> artifactLocationIndices;

AppScanSarifLocationData(final SarifSchema210 sarif, final CodeDirectory codeDirectory) {
log.debug("Cleaning locations");
List<String> locations =
sarif.getRuns().get(0).getArtifacts().stream()
.map(Artifact::getLocation)
.map(ArtifactLocation::getUri)
.map(u -> u.substring(8)) // trim the file:/// of all results
.toList();

Map<Path, Set<Integer>> artifactLocationIndicesMap = new HashMap<>();

log.debug("Calculating locations in project dir");
for (int i = 0; i < locations.size(); i++) {
final Integer index = i;
String path = locations.get(i);
path = path.replace('\\', '/');
// we have a real but partial path, now we have to find it in the repository
Optional<Path> existingRealPath;
try {
existingRealPath = codeDirectory.findFilesWithTrailingPath(path);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
// add to the map if we found a matching file
existingRealPath.ifPresent(
p -> artifactLocationIndicesMap.computeIfAbsent(p, k -> new HashSet<>()).add(index));
}
log.debug("Done calculating locations");
this.artifactLocationIndices = Map.copyOf(artifactLocationIndicesMap);
}

Map<Path, Set<Integer>> getArtifactLocationIndices() {
return artifactLocationIndices;
}

private static final Logger log = LoggerFactory.getLogger(AppScanSarifLocationData.class);
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
final class AppScanModuleTest {

@Codemod(
id = "appscan-test:java/finds-stuff",
id = "appscan:java/finds-stuff",
importance = Importance.LOW,
reviewGuidance = ReviewGuidance.MERGE_AFTER_CURSORY_REVIEW)
static class AppScanSarifTestCodemod implements CodeChanger {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,7 @@
import com.contrastsecurity.sarif.Result;
import com.contrastsecurity.sarif.SarifSchema210;
import com.google.inject.AbstractModule;
import io.codemodder.CodeChanger;
import io.codemodder.LazyLoadingRuleSarif;
import io.codemodder.RuleSarif;
import io.codemodder.*;
import io.github.classgraph.*;
import java.lang.reflect.Executable;
import java.lang.reflect.Parameter;
Expand Down Expand Up @@ -83,6 +81,11 @@ protected void configure() {
}
}

if (scanTargets.isEmpty()) {
LOG.trace("No @PmdScan annotations found, skipping");
return;
}

SarifSchema210 allRulesBatchedRun =
pmdRunner.run(
scanTargets.stream().map(PmdScanTarget::pmdScan).map(PmdScan::ruleId).toList(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ public abstract class MultipleDeclarations {
}

@Codemod(
id = "pmd-test:java/my-pmd-codemod",
id = "pmd:java/my-pmd-codemod",
importance = Importance.HIGH,
reviewGuidance = ReviewGuidance.MERGE_AFTER_CURSORY_REVIEW)
static class UsesPmdCodemod extends SarifPluginJavaParserChanger<VariableDeclarator> {
Expand Down
Loading

0 comments on commit 0fd8818

Please sign in to comment.