Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change queryname sort to match samtools #1601

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 59 additions & 36 deletions src/main/java/htsjdk/samtools/SAMRecordQueryNameComparator.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,42 +31,15 @@
public class SAMRecordQueryNameComparator implements SAMRecordComparator, Serializable {
private static final long serialVersionUID = 1L;

private static boolean isDigit(final char c) {
return Character.isDigit(c);
}

@Override
public int compare(final SAMRecord samRecord1, final SAMRecord samRecord2) {
int cmp = fileOrderCompare(samRecord1, samRecord2);
if (cmp != 0) {
return cmp;
}

final boolean r1Paired = samRecord1.getReadPairedFlag();
final boolean r2Paired = samRecord2.getReadPairedFlag();

if (r1Paired || r2Paired) {
if (!r1Paired) return 1;
else if (!r2Paired) return -1;
else if (samRecord1.getFirstOfPairFlag() && samRecord2.getSecondOfPairFlag()) return -1;
else if (samRecord1.getSecondOfPairFlag() && samRecord2.getFirstOfPairFlag()) return 1;
}

if (samRecord1.getReadNegativeStrandFlag() != samRecord2.getReadNegativeStrandFlag()) {
return (samRecord1.getReadNegativeStrandFlag()? 1: -1);
}
if (samRecord1.isSecondaryAlignment() != samRecord2.isSecondaryAlignment()) {
return samRecord2.isSecondaryAlignment()? -1: 1;
}
if (samRecord1.getSupplementaryAlignmentFlag() != samRecord2.getSupplementaryAlignmentFlag()) {
return samRecord2.getSupplementaryAlignmentFlag() ? -1 : 1;
}
final Integer hitIndex1 = samRecord1.getIntegerAttribute(SAMTag.HI);
final Integer hitIndex2 = samRecord2.getIntegerAttribute(SAMTag.HI);
if (hitIndex1 != null) {
if (hitIndex2 == null) return 1;
else {
cmp = hitIndex1.compareTo(hitIndex2);
if (cmp != 0) return cmp;
}
} else if (hitIndex2 != null) return -1;
return 0;
final int retval = compareReadNames(samRecord1.getReadName(), samRecord2.getReadName());
if (retval == 0) return Integer.compare(samRecord1.getFlags()&0xc0, samRecord2.getFlags()&0xc0);
else return retval;
}

/**
Expand All @@ -85,7 +58,57 @@ public int fileOrderCompare(final SAMRecord samRecord1, final SAMRecord samRecor
* Encapsulate algorithm for comparing read names in queryname-sorted file, since there have been
* conversations about changing the behavior.
*/
public static int compareReadNames(final String readName1, final String readName2) {
return readName1.compareTo(readName2);
public static int compareReadNames(final String name1, final String name2) {
int index1 = 0;
int index2 = 0;
final int len1 = name1.length();
final int len2 = name2.length();

// keep going while we have characters left
while (index1 < len1 && index2 < len2) {
if (!isDigit(name1.charAt(index1)) || !isDigit(name2.charAt(index2))) { // no more
if (name1.charAt(index1) != name2.charAt(index2)) {
return (int) name1.charAt(index1) - (int) name2.charAt(index2);
} else {
index1++;
index2++;
}
} else { // next characters are digits
// skip over leading zeros
while (index1 < len1 && name1.charAt(index1) == '0') index1++;
while (index2 < len2 && name2.charAt(index2) == '0') index2++;
// skip over any matching digits
while (index1 < len1 && index2 < len2 &&
isDigit(name1.charAt(index1)) &&
isDigit(name2.charAt(index2)) &&
name1.charAt(index1) == name2.charAt(index2)) {
index1++;
index2++;
}

boolean isDigit1 = index1 < len1 && isDigit(name1.charAt(index1));
boolean isDigit2 = index2 < len2 && isDigit(name2.charAt(index2));

if (isDigit1 && isDigit2) {
// skip until we hit a non-digit or the end.
int i = 0;
while (index1 + i < len1 && index2 + i < len2 &&
isDigit(name1.charAt(index1 + i)) &&
isDigit(name2.charAt(index2 + i))) {
i++;
}
if (index1 + i < len1 && isDigit(name1.charAt(index1 + i))) return 1;
else if (index2 + i < len2 && isDigit(name2.charAt(index2 + i))) return -1;
else return (int)name1.charAt(index1) - (int)name2.charAt(index2);
}
else if (isDigit1) return 1;
else if (isDigit2) return -1;
else if (index1 != index2) return index2 - index1;
}
}

if (index1 < len1) return 1;
else if (index2 < len2) return -1;
else return 0;
}
}
90 changes: 23 additions & 67 deletions src/test/java/htsjdk/samtools/SAMRecordQueryNameComparatorTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -49,76 +49,32 @@ public void testCompareDifferentNames() throws Exception {
Assert.assertTrue(COMPARATOR.compare(b, a) > 0);
}

private static SAMRecord copyAndSet(final SAMRecord record, final Consumer<SAMRecord> setParams) {
final SAMRecord copy = record.deepCopy();
setParams.accept(copy);
return copy;
}


// this test cases are separated for the different names comparison for re-use with SAMRecordQueryHashComparator
@DataProvider
public static Object[][] equalNameComparisonData() {
// base record with the information used in the comparator:
// - read name A
// - positive strand -> default
// - unpaired (without first/second of pair flags) -> explicitly set
// - primary alignment (and not supplementary) -> explicitly set
// - no hit index (HI tag) -> default
final SAMRecord record = new SAMRecord(null);
record.setReadName("A");
record.setReadPairedFlag(false);
record.setFirstOfPairFlag(false);
record.setSecondOfPairFlag(false);
// primary/secondary/supplementary alignments
record.setSecondaryAlignment(false);
record.setSupplementaryAlignmentFlag(false);

// record1, record2, comparison value
return new Object[][] {
// same record is equals after comparing all the fields
{record, record, 0},

// upaired vs. paired
{record, copyAndSet(record, (r) -> r.setReadPairedFlag(true)), 1},
{copyAndSet(record, (r) -> r.setReadPairedFlag(true)), record, -1},
// first/second of pair in natural order
{copyAndSet(record, r -> {r.setReadPairedFlag(true); r.setFirstOfPairFlag(true);}),
copyAndSet(record, r -> {r.setReadPairedFlag(true); r.setSecondOfPairFlag(true);}),
-1},
{copyAndSet(record, r -> {r.setReadPairedFlag(true); r.setSecondOfPairFlag(true);}),
copyAndSet(record, r -> {r.setReadPairedFlag(true); r.setFirstOfPairFlag(true);}),
1},

// negative strand is the last
{record, copyAndSet(record, r -> r.setReadNegativeStrandFlag(true)), -1},

// primary alignment is first compared to not primary
{record, copyAndSet(record, r -> r.setSecondaryAlignment(true)), -1},
{copyAndSet(record, r -> r.setSecondaryAlignment(true)), record, 1},
// secondary alignment is last compared to primary
{record, copyAndSet(record, r -> r.setSupplementaryAlignmentFlag(true)), -1},
{copyAndSet(record, r -> r.setSupplementaryAlignmentFlag(true)), record, 1},

// the one with HI tag is first if the other is null
{record, copyAndSet(record, r -> r.setAttribute(SAMTag.HI, 1)), -1},
{copyAndSet(record, r -> r.setAttribute(SAMTag.HI, 1)), record, 1},
// if both have HI tag, order by it
{copyAndSet(record, r -> r.setAttribute(SAMTag.HI, 1)),
copyAndSet(record, r -> r.setAttribute(SAMTag.HI, 1)), 0},
{copyAndSet(record, r -> r.setAttribute(SAMTag.HI, 1)),
copyAndSet(record, r -> r.setAttribute(SAMTag.HI, 2)), -1},
{copyAndSet(record, r -> r.setAttribute(SAMTag.HI, 16)),
copyAndSet(record, r -> r.setAttribute(SAMTag.HI, 5)), 1}
@DataProvider(name = "readNameOrderTestCases")
public Object[][] readNameOrderTestCases() {
// See: https://sourceforge.net/p/samtools/mailman/message/26674128/
final String[] names1 = {"#01", "#2", ".1", ".09", "a001", "a01", "a01z", "a1", "a1a"};
// See: https://github.com/samtools/samtools/blob/bdc5bb81c11f2ab25ea97351213cb87b33857c4d/test/sort/name.sort.expected.sam#L14-L28
final String[] names2 = {"r000", "r001", "r002", "r003", "r004", "u1", "x1", "x2", "x3", "x4", "x5", "x6"};
return new Object[][]{
names1, names2
};
}



@Test(dataProvider = "equalNameComparisonData")
public void testCompareEqualNames(final SAMRecord record1, final SAMRecord record2, final int sign) throws Exception {
final int comparisonResult = COMPARATOR.compare(record1, record2);
Assert.assertEquals(Integer.signum(comparisonResult),sign);
@Test(dataProvider = "readNameOrderTestCases")
public void testReadNameOrder(final String[] names) {
final SAMRecord a = new SAMRecord(null);
final SAMRecord b = new SAMRecord(null);
int i, j;
for (i = 0; i < names.length; i++) {
for (j = 0; j < names.length; j++) {
a.setReadName(names[i]);
b.setReadName(names[j]);
final int actual = Integer.compare(COMPARATOR.compare(a, b), 0);
final int expected = Integer.compare(i, j);
Assert.assertEquals(actual, expected, names[i] + " < " + names[j]);
}
}
}


}