diff --git a/Data/salami-data-public b/Data/salami-data-public new file mode 120000 index 0000000..a66aeab --- /dev/null +++ b/Data/salami-data-public @@ -0,0 +1 @@ +/Users/ben/src/salami-data-public \ No newline at end of file diff --git a/Data/test_tracks.txt b/Data/test_tracks.txt index bdad88d..d7d6f00 100644 --- a/Data/test_tracks.txt +++ b/Data/test_tracks.txt @@ -1,46 +1,106 @@ -4.m4a -40.m4a -46.m4a -5.m4a -6.m4a -8.m4a -955.mp3 -956.mp3 -957.mp3 -958.mp3 -959.mp3 -960.mp3 -962.mp3 -963.mp3 +1166.mp3 +1090.mp3 +584.m4a +346.m4a +1026.mp3 +1142.mp3 +1302.mp3 +1131.mp3 +608.m4a +1274.mp3 +1376.mp3 +670.m4a +1399.mp3 +1319.mp3 +18.m4a +1123.mp3 +342.m4a +10013.mp3 +642.m4a +306.m4a +1488.mp3 +516.m4a +1192.mp3 +10024.mp3 +1357.mp3 +404.m4a +1063.mp3 +1331.mp3 +1356.mp3 +1322.mp3 +1170.mp3 +1440.mp3 +1091.mp3 964.mp3 -965.mp3 -966.mp3 -967.mp3 -968.mp3 +1436.mp3 +1414.mp3 +1474.mp3 +1036.mp3 +1040.mp3 +426.m4a +1087.mp3 +1301.mp3 970.mp3 -971.mp3 -972.mp3 -973.mp3 -974.mp3 -975.mp3 -976.mp3 -978.mp3 -979.mp3 -980.mp3 -981.mp3 -982.mp3 -983.mp3 -984.mp3 -986.mp3 -987.mp3 -988.mp3 -989.mp3 -990.mp3 -991.mp3 +1141.mp3 +1250.mp3 +1483.mp3 992.mp3 -994.mp3 -995.mp3 -996.mp3 -997.mp3 -998.mp3 -999.mp3 +1223.mp3 +1284.mp3 +10012.mp3 +472.m4a +6.m4a +986.mp3 +678.m4a +1227.mp3 +1152.mp3 +5.m4a +1270.mp3 +488.m4a +1311.mp3 +1421.mp3 +1402.mp3 +522.m4a +354.m4a +1276.mp3 +1339.mp3 +1236.mp3 +1445.mp3 +1221.mp3 +1244.mp3 +1080.mp3 +824.m4a +752.m4a +10043.mp3 +543.m4a +587.m4a +818.m4a +950.m4a +615.m4a +1640.m4a +1654.m4a +663.m4a +1648.m4a +359.m4a +1602.m4a +39.m4a +1624.m4a +807.m4a +459.m4a +355.m4a +728.m4a +531.m4a +549.m4a +10050.mp3 +437.m4a +855.m4a +951.m4a +653.m4a +879.m4a +935.m4a +835.m4a +629.m4a +10051.mp3 +541.m4a +893.m4a +341.m4a diff --git a/Data/train_tracks.txt b/Data/train_tracks.txt index 95ed51d..d6a95cc 100644 --- a/Data/train_tracks.txt +++ b/Data/train_tracks.txt @@ -1,449 +1,866 @@ -10.m4a -1000.mp3 -1003.mp3 +10007.mp3 +484.m4a +1136.mp3 +1343.mp3 +1027.mp3 +971.mp3 +1130.mp3 +10032.mp3 +991.mp3 +616.m4a +1076.mp3 +478.m4a +1300.mp3 +1333.mp3 +1395.mp3 +440.m4a 1004.mp3 -1005.mp3 -1006.mp3 -1007.mp3 -1008.mp3 -1011.mp3 -1012.mp3 -1013.mp3 -1014.mp3 -1015.mp3 -1018.mp3 -1019.mp3 -1020.mp3 -1021.mp3 -1022.mp3 -1023.mp3 +1372.mp3 +512.m4a +1155.mp3 +1397.mp3 +1485.mp3 1024.mp3 -1026.mp3 -1027.mp3 -1029.mp3 +1093.mp3 +660.m4a +1254.mp3 +1460.mp3 +1149.mp3 +338.m4a +1396.mp3 +52.m4a +987.mp3 +1384.mp3 +1423.mp3 +594.m4a +1107.mp3 +1410.mp3 1030.mp3 -1032.mp3 -1034.mp3 +1403.mp3 +14.m4a +20.m4a +480.m4a +1455.mp3 +37.m4a +995.mp3 +1430.mp3 +1147.mp3 +1392.mp3 +1164.mp3 +1205.mp3 +626.m4a +1182.mp3 +444.m4a +1448.mp3 +4.m4a +1374.mp3 +996.mp3 +1328.mp3 +1365.mp3 +1358.mp3 +989.mp3 +1478.mp3 +1157.mp3 +1144.mp3 +1286.mp3 +384.m4a +1179.mp3 +1404.mp3 +1256.mp3 +974.mp3 +1271.mp3 +498.m4a +1327.mp3 +618.m4a +1354.mp3 +966.mp3 +955.mp3 1035.mp3 -1036.mp3 -1037.mp3 -1038.mp3 -1039.mp3 -1040.mp3 -1042.mp3 -1043.mp3 -1044.mp3 -1045.mp3 1046.mp3 -1047.mp3 +1352.mp3 +10023.mp3 +1224.mp3 +1204.mp3 +1038.mp3 +1059.mp3 +534.m4a +420.m4a +1490.mp3 +474.m4a +1243.mp3 +1086.mp3 +1226.mp3 1048.mp3 -1051.mp3 +1476.mp3 +1214.mp3 +10033.mp3 +1162.mp3 +340.m4a +13.m4a +10025.mp3 +450.m4a +1138.mp3 +1359.mp3 +1219.mp3 +10.m4a +1202.mp3 +965.mp3 +1023.mp3 +1375.mp3 +1140.mp3 +1039.mp3 +1083.mp3 +1092.mp3 1052.mp3 -1053.mp3 -1054.mp3 +1310.mp3 +1462.mp3 +10021.mp3 +1007.mp3 +690.m4a +1242.mp3 +1120.mp3 +1496.mp3 +576.m4a +1167.mp3 +652.m4a 1055.mp3 -1056.mp3 -1058.mp3 -1059.mp3 -1060.mp3 -1061.mp3 -1062.mp3 -1063.mp3 +1419.mp3 +676.m4a +416.m4a +1316.mp3 +1288.mp3 +634.m4a +1299.mp3 +648.m4a +1268.mp3 +1078.mp3 +1459.mp3 +524.m4a +978.mp3 +1114.mp3 +614.m4a +1218.mp3 1064.mp3 -1066.mp3 -1067.mp3 -1068.mp3 -1069.mp3 -1070.mp3 -1071.mp3 -1072.mp3 +1463.mp3 +612.m4a +1122.mp3 +1232.mp3 +1258.mp3 +408.m4a +1408.mp3 +402.m4a +1306.mp3 1074.mp3 -1075.mp3 -1076.mp3 -1077.mp3 -1078.mp3 +983.mp3 +1069.mp3 +8.m4a +1126.mp3 +1335.mp3 +1062.mp3 +10008.mp3 +370.m4a +1272.mp3 +1326.mp3 +1429.mp3 +1124.mp3 +320.m4a +1196.mp3 +1464.mp3 +1350.mp3 +12.m4a +1099.mp3 +1054.mp3 +1435.mp3 +1439.mp3 +372.m4a +1269.mp3 +568.m4a +1422.mp3 +10020.mp3 +10009.mp3 +307.m4a +1109.mp3 +1206.mp3 +1318.mp3 +350.m4a +1450.mp3 +360.m4a +963.mp3 +476.m4a +1251.mp3 +1132.mp3 +1011.mp3 +1424.mp3 +492.m4a +1005.mp3 +1266.mp3 1079.mp3 -1080.mp3 -1082.mp3 -1083.mp3 +1115.mp3 +1360.mp3 +1175.mp3 +1431.mp3 +1294.mp3 +520.m4a +1245.mp3 +410.m4a +1239.mp3 +468.m4a +16.m4a +1195.mp3 +1151.mp3 +1493.mp3 1084.mp3 -1085.mp3 -1086.mp3 -1087.mp3 -1088.mp3 -1090.mp3 -1091.mp3 -1092.mp3 -1093.mp3 -1095.mp3 -1096.mp3 -1098.mp3 -1099.mp3 -1101.mp3 -1102.mp3 -1103.mp3 +1240.mp3 +1378.mp3 +1037.mp3 +988.mp3 +324.m4a 1104.mp3 -1106.mp3 -1107.mp3 -1108.mp3 -1109.mp3 -1110.mp3 -1111.mp3 +979.mp3 +424.m4a +1467.mp3 +975.mp3 +364.m4a +1171.mp3 +10026.mp3 +1285.mp3 +668.m4a +1189.mp3 +1291.mp3 +596.m4a +1261.mp3 +1072.mp3 +442.m4a +356.m4a +1148.mp3 +956.mp3 +1070.mp3 +482.m4a +396.m4a +1067.mp3 +486.m4a 1112.mp3 -1114.mp3 -1115.mp3 -1116.mp3 -1117.mp3 -1118.mp3 +358.m4a +982.mp3 +1173.mp3 +334.m4a +1262.mp3 +1412.mp3 +1315.mp3 +1309.mp3 +1106.mp3 +1287.mp3 +570.m4a +1389.mp3 +1135.mp3 1119.mp3 -1120.mp3 -1122.mp3 -1123.mp3 -1124.mp3 +1407.mp3 +1075.mp3 +666.m4a +1207.mp3 +1367.mp3 +1362.mp3 +1451.mp3 +998.mp3 +1246.mp3 +1381.mp3 +1101.mp3 +1003.mp3 1125.mp3 -1126.mp3 -1127.mp3 -1128.mp3 -1130.mp3 -1131.mp3 -1132.mp3 -1133.mp3 -1134.mp3 -1135.mp3 -1136.mp3 -1138.mp3 -1139.mp3 -1140.mp3 -1141.mp3 -1142.mp3 -1143.mp3 -1144.mp3 -1146.mp3 -1147.mp3 -1148.mp3 -1149.mp3 -1150.mp3 -1151.mp3 -1152.mp3 +1386.mp3 +536.m4a +1238.mp3 +1095.mp3 +994.mp3 +1088.mp3 +394.m4a +46.m4a 1154.mp3 -1155.mp3 -1156.mp3 -1157.mp3 +1264.mp3 +1077.mp3 +1188.mp3 +1472.mp3 +1134.mp3 +1293.mp3 +1117.mp3 +1053.mp3 +658.m4a +1461.mp3 +422.m4a +1215.mp3 +1045.mp3 +317.m4a 1158.mp3 +1346.mp3 +1194.mp3 +1446.mp3 +10022.mp3 1159.mp3 -1160.mp3 -1162.mp3 -1163.mp3 -1164.mp3 -1165.mp3 -1166.mp3 -1167.mp3 +1368.mp3 +1332.mp3 +1096.mp3 +502.m4a +1394.mp3 1168.mp3 -1170.mp3 -1171.mp3 -1172.mp3 -1173.mp3 +1181.mp3 +610.m4a +392.m4a +322.m4a +1371.mp3 +39.m4a +560.m4a +1180.mp3 +1338.mp3 +1443.mp3 +1111.mp3 +1432.mp3 +532.m4a +496.m4a +1482.mp3 +981.mp3 +311.m4a +366.m4a +694.m4a +1212.mp3 +1102.mp3 +997.mp3 +646.m4a +1042.mp3 +1060.mp3 1174.mp3 -1175.mp3 +1382.mp3 +959.mp3 +554.m4a +510.m4a +1247.mp3 +1213.mp3 +323.m4a +10017.mp3 +1082.mp3 +1110.mp3 +1307.mp3 +1495.mp3 +1296.mp3 +10016.mp3 +1108.mp3 +1364.mp3 +1470.mp3 +1021.mp3 +1492.mp3 +1484.mp3 +654.m4a +504.m4a +30.m4a +1235.mp3 +10027.mp3 +1211.mp3 1176.mp3 -1178.mp3 -1179.mp3 -1180.mp3 -1181.mp3 -1182.mp3 -1183.mp3 +1015.mp3 +574.m4a +1314.mp3 +1494.mp3 +1405.mp3 +999.mp3 +10014.mp3 +990.mp3 +1071.mp3 1184.mp3 -1186.mp3 -1187.mp3 -1188.mp3 -1189.mp3 -1190.mp3 -1191.mp3 -1192.mp3 -1194.mp3 -1195.mp3 -1196.mp3 -1197.mp3 -1198.mp3 +506.m4a +1336.mp3 1199.mp3 -12.m4a -1200.mp3 -1202.mp3 -1203.mp3 -1204.mp3 -1205.mp3 -1206.mp3 -1207.mp3 -1208.mp3 -1210.mp3 -1211.mp3 -1212.mp3 -1213.mp3 -1214.mp3 -1215.mp3 -1216.mp3 -1218.mp3 -1219.mp3 -1220.mp3 -1221.mp3 1222.mp3 -1223.mp3 -1224.mp3 -1226.mp3 -1227.mp3 -1228.mp3 -1229.mp3 -1230.mp3 -1231.mp3 -1232.mp3 -1234.mp3 -1235.mp3 -1236.mp3 -1237.mp3 -1238.mp3 -1239.mp3 -1240.mp3 -1242.mp3 -1243.mp3 -1244.mp3 -1245.mp3 -1246.mp3 -1247.mp3 -1248.mp3 -1250.mp3 -1251.mp3 +976.mp3 +1128.mp3 +1044.mp3 +1000.mp3 +1051.mp3 +1442.mp3 +24.m4a +1210.mp3 +578.m4a +564.m4a +1032.mp3 +1437.mp3 +10029.mp3 +1406.mp3 +1379.mp3 +1347.mp3 +1456.mp3 +1438.mp3 +508.m4a +1022.mp3 +1308.mp3 +1413.mp3 +1012.mp3 +3.m4a +1127.mp3 1253.mp3 -1254.mp3 -1256.mp3 -1258.mp3 -1259.mp3 +10035.mp3 +1390.mp3 +980.mp3 +1351.mp3 +368.m4a +1317.mp3 +1150.mp3 +550.m4a +967.mp3 +630.m4a +1342.mp3 +968.mp3 1260.mp3 -1261.mp3 -1262.mp3 -1263.mp3 -1264.mp3 -1266.mp3 -1267.mp3 -1268.mp3 -1269.mp3 -1270.mp3 -1271.mp3 -1272.mp3 -1274.mp3 +1383.mp3 +1428.mp3 +590.m4a +1468.mp3 +1133.mp3 +1324.mp3 +1444.mp3 +1118.mp3 +1008.mp3 +10019.mp3 +1420.mp3 +448.m4a +606.m4a +1029.mp3 +1160.mp3 +1447.mp3 +548.m4a +1415.mp3 +604.m4a +1220.mp3 1275.mp3 -1276.mp3 -1277.mp3 -1278.mp3 +10034.mp3 +336.m4a +1186.mp3 +1469.mp3 +1475.mp3 +1454.mp3 +1434.mp3 +1418.mp3 +1014.mp3 +686.m4a +1427.mp3 +10031.mp3 1279.mp3 -1280.mp3 +1006.mp3 1282.mp3 +1325.mp3 +1172.mp3 +1280.mp3 +957.mp3 +632.m4a +1043.mp3 +556.m4a +1387.mp3 +1230.mp3 +10030.mp3 +984.mp3 +1278.mp3 +1400.mp3 +1143.mp3 +10011.mp3 +1103.mp3 +1491.mp3 +662.m4a 1283.mp3 -1284.mp3 -1285.mp3 -1286.mp3 -1287.mp3 -1288.mp3 -1290.mp3 -1291.mp3 +1334.mp3 +1068.mp3 +1228.mp3 +1066.mp3 +696.m4a +1116.mp3 +1056.mp3 +335.m4a +1348.mp3 +674.m4a 1292.mp3 -1293.mp3 -1294.mp3 -1295.mp3 -1296.mp3 -1298.mp3 -1299.mp3 -13.m4a -1300.mp3 -1301.mp3 -1302.mp3 -1303.mp3 +1156.mp3 1304.mp3 -1306.mp3 -1307.mp3 -1308.mp3 -1309.mp3 -1310.mp3 -1311.mp3 -1312.mp3 -1314.mp3 -1315.mp3 -1316.mp3 -1317.mp3 -1318.mp3 -1319.mp3 +1197.mp3 +1013.mp3 +1355.mp3 +1216.mp3 +1380.mp3 +1426.mp3 1320.mp3 -1322.mp3 -1323.mp3 -1324.mp3 -1325.mp3 -1326.mp3 -1327.mp3 -1328.mp3 -1330.mp3 -1331.mp3 -1332.mp3 -1333.mp3 -1334.mp3 -1335.mp3 -1336.mp3 -1338.mp3 -1339.mp3 +352.m4a +1267.mp3 +1085.mp3 +325.m4a +620.m4a +640.m4a +1234.mp3 +1203.mp3 +1163.mp3 +22.m4a +10018.mp3 +1479.mp3 +622.m4a +1487.mp3 +1486.mp3 +344.m4a +1200.mp3 1340.mp3 +1018.mp3 +1388.mp3 +1363.mp3 +1187.mp3 +1139.mp3 +960.mp3 +1229.mp3 +1208.mp3 +1034.mp3 +1178.mp3 +562.m4a 1341.mp3 -1342.mp3 -1343.mp3 -1346.mp3 -1347.mp3 -1348.mp3 +1303.mp3 +1477.mp3 +1058.mp3 +1020.mp3 1349.mp3 -1350.mp3 -1351.mp3 -1352.mp3 -1354.mp3 -1355.mp3 -1356.mp3 -1357.mp3 -1358.mp3 -1359.mp3 -1360.mp3 -1362.mp3 -1363.mp3 -1364.mp3 -1365.mp3 -1366.mp3 -1367.mp3 -1368.mp3 -1370.mp3 -1371.mp3 -1372.mp3 -1373.mp3 -1374.mp3 -1375.mp3 -1376.mp3 -1378.mp3 -1379.mp3 -1380.mp3 -1381.mp3 -1382.mp3 -1383.mp3 -1384.mp3 -1386.mp3 -1387.mp3 -1388.mp3 -1389.mp3 -1390.mp3 -1391.mp3 -1392.mp3 -1394.mp3 -1395.mp3 -1396.mp3 -1397.mp3 -1398.mp3 -1399.mp3 -14.m4a -1400.mp3 -1402.mp3 -1403.mp3 -1404.mp3 -1405.mp3 -1406.mp3 -1407.mp3 -1408.mp3 -1410.mp3 +650.m4a +1190.mp3 +1295.mp3 +962.mp3 +514.m4a +972.mp3 +586.m4a +1312.mp3 +664.m4a 1411.mp3 -1412.mp3 -1413.mp3 -1414.mp3 -1415.mp3 -1418.mp3 -1419.mp3 -1420.mp3 -1421.mp3 -1422.mp3 -1423.mp3 -1424.mp3 -1426.mp3 -1427.mp3 -1428.mp3 -1429.mp3 -1430.mp3 -1431.mp3 -1432.mp3 -1434.mp3 -1435.mp3 -1436.mp3 -1437.mp3 -1438.mp3 -1439.mp3 -1440.mp3 -1442.mp3 -1443.mp3 -1444.mp3 -1445.mp3 -1446.mp3 -1447.mp3 -1448.mp3 -1450.mp3 -1451.mp3 -1452.mp3 -1453.mp3 -1454.mp3 -1455.mp3 -1456.mp3 +1277.mp3 +1366.mp3 +1231.mp3 +386.m4a 1458.mp3 -1459.mp3 -1460.mp3 -1461.mp3 -1462.mp3 -1463.mp3 -1464.mp3 -1466.mp3 -1467.mp3 -1468.mp3 -1469.mp3 -1470.mp3 -1472.mp3 -1474.mp3 -1475.mp3 -1476.mp3 -1477.mp3 -1478.mp3 -1479.mp3 -1482.mp3 -1483.mp3 -1484.mp3 -1485.mp3 -1486.mp3 -1487.mp3 -1488.mp3 -1490.mp3 -1491.mp3 -1492.mp3 -1493.mp3 -1494.mp3 -1495.mp3 -1496.mp3 +1263.mp3 +602.m4a +382.m4a +1248.mp3 +1146.mp3 +328.m4a +10028.mp3 +1061.mp3 +466.m4a +528.m4a +1452.mp3 1498.mp3 -16.m4a -18.m4a -20.m4a -22.m4a -24.m4a -3.m4a -30.m4a -306.m4a -307.m4a +636.m4a +1398.mp3 +1373.mp3 +1290.mp3 +1183.mp3 +1298.mp3 +1237.mp3 +1323.mp3 +10015.mp3 +1198.mp3 +518.m4a +10010.mp3 +1098.mp3 +1047.mp3 +1165.mp3 +1191.mp3 +348.m4a +1466.mp3 +1019.mp3 +1453.mp3 +428.m4a +624.m4a +1391.mp3 +958.mp3 +973.mp3 +1259.mp3 +1370.mp3 310.m4a -311.m4a -317.m4a -320.m4a -322.m4a -323.m4a -324.m4a -325.m4a -328.m4a -334.m4a -335.m4a -336.m4a -338.m4a -37.m4a +1330.mp3 +692.m4a +571.m4a +575.m4a +10042.mp3 +339.m4a +842.m4a +411.m4a +379.m4a +63.m4a +791.m4a +746.m4a +852.m4a +483.m4a +795.m4a +774.m4a +739.m4a +1642.m4a +732.m4a +491.m4a +27.m4a +802.m4a +882.m4a +659.m4a +43.m4a +906.m4a +691.m4a +535.m4a +371.m4a +651.m4a +455.m4a +7.m4a +675.m4a +744.m4a +399.m4a +431.m4a +75.m4a +15.m4a +51.m4a +515.m4a +836.m4a +407.m4a +551.m4a +783.m4a +846.m4a +10036.mp3 +667.m4a +892.m4a +555.m4a +832.m4a +1632.m4a +647.m4a +11.m4a +687.m4a +603.m4a +427.m4a +419.m4a +591.m4a +936.m4a +655.m4a +695.m4a +708.m4a +816.m4a +706.m4a +866.m4a +864.m4a +447.m4a +1610.m4a +511.m4a +731.m4a +704.m4a +1650.m4a +527.m4a +750.m4a +567.m4a +1644.m4a +367.m4a +363.m4a +803.m4a +702.m4a +786.m4a +559.m4a +946.m4a +1628.m4a +862.m4a +579.m4a +583.m4a +475.m4a +1620.m4a +10041.mp3 +727.m4a +343.m4a +35.m4a +707.m4a +10037.mp3 +1600.m4a +914.m4a +643.m4a +1634.m4a +627.m4a +794.m4a +683.m4a +1010.m4a +858.m4a +619.m4a +10039.mp3 +930.m4a +700.m4a +747.m4a +768.m4a +1630.m4a +10038.mp3 +1604.m4a +19.m4a +811.m4a +815.m4a +1614.m4a +471.m4a +611.m4a +607.m4a +479.m4a +799.m4a +563.m4a +635.m4a +822.m4a +669.m4a +47.m4a +910.m4a +682.m4a +599.m4a +767.m4a +1612.m4a +71.m4a +828.m4a +1626.m4a +31.m4a +23.m4a +1622.m4a +595.m4a +463.m4a +854.m4a +1638.m4a +1652.m4a +860.m4a +784.m4a +787.m4a +10040.mp3 +755.m4a +1646.m4a +631.m4a +760.m4a +770.m4a +1618.m4a +703.m4a +623.m4a +1636.m4a +519.m4a +451.m4a +726.m4a +1608.m4a +539.m4a +898.m4a +1606.m4a +814.m4a +639.m4a +834.m4a +954.m4a +790.m4a +439.m4a +79.m4a +782.m4a +803.m4a +605.m4a +10086.m4a +53.m4a +10075.m4a +823.m4a +10079.m4a +685.m4a +10087.m4a +10088.m4a +533.m4a +10081.m4a +701.m4a +901.m4a +39.m4a +39.m4a +827.m4a +525.m4a +933.m4a +10045.mp3 +389.m4a +10078.m4a +1655.m4a +10070.m4a +799.m4a +581.m4a +85.m4a +10062.m4a +597.m4a +943.m4a +565.m4a +10068.m4a +10074.m4a +445.m4a +10044.mp3 +1651.m4a +10058.m4a +829.m4a +909.m4a +557.m4a +381.m4a +621.m4a +485.m4a +931.m4a +413.m4a +357.m4a +839.m4a +10072.m4a +911.m4a +493.m4a +1635.m4a +1647.m4a +733.m4a +10091.m4a +837.m4a +10052.mp3 +1627.m4a +10054.mp3 +429.m4a +10071.m4a +10059.m4a +645.m4a +859.m4a +10063.m4a +501.m4a +21.m4a +10049.mp3 +10056.mp3 +10084.m4a +863.m4a +10090.m4a +10053.mp3 +10076.m4a +1607.m4a +895.m4a +10083.m4a +795.m4a +10048.mp3 +517.m4a +10080.m4a +853.m4a +851.m4a +847.m4a +10069.m4a +477.m4a +589.m4a +861.m4a +333.m4a +10073.m4a +10057.m4a +941.m4a +1643.m4a +677.m4a +661.m4a +10067.m4a +10082.m4a +10089.m4a +1619.m4a +1623.m4a +1615.m4a +831.m4a +10047.mp3 +397.m4a +693.m4a +10066.m4a +10055.mp3 +10046.mp3 +573.m4a +10077.m4a +819.m4a +461.m4a +10085.m4a +813.m4a +10061.m4a +10065.m4a +949.m4a +469.m4a +309.m4a +709.m4a +10060.m4a diff --git a/Python/evaluation.py b/Python/evaluation.py index 36a2b97..05c50eb 100644 --- a/Python/evaluation.py +++ b/Python/evaluation.py @@ -12,11 +12,12 @@ import peakutils import mir_eval import paths +import parameters + +from operator import itemgetter predictions_path = '../Data/predsTestTracks_100epochs_lr005.npy' file_list_path = '../Data/fileListsAndIndex.pickle' -f_measure_thresh = 3 # tolerance window in seconds - def load_data(preds_file, file_lists): """ @@ -37,6 +38,33 @@ def load_data(preds_file, file_lists): return preds, test_files, test_idx +def choose_preds(preds, beat_times): + # At test time, we apply the trained network to each position in the + # spectrogram of the music piece to be segmented, ob- taining a boundary + # probability for each frame. We then employ a simple means of peak-picking + # on this boundary activation curve: Every output value that is not + # surpassed within ±6 seconds is a boundary candidate. From each candidate + # value we subtract the average of the activation curve in the past 12 and + # future 6 seconds, to compensate for long-term trends. We end up with a + # list of boundary candidates along with strength values that can be + # thresh- olded at will. We found that more elaborate peak picking methods + # did not improve results. + preds_out = np.zeros((len(preds))) + + for i in range(len(preds)): + pred_time = beat_times[i] + in_window = (beat_times > pred_time - 6) & (beat_times <= pred_time + 6) + max_in_window = np.argmax(np.where(in_window, preds, 0)) + if i == max_in_window: + in_avg_window = (beat_times > pred_time - 12) & (beat_times <= pred_time + 6) + window_avg = np.mean(preds[in_avg_window]) + preds_out[i] = preds[i] - window_avg + else: + preds_out[i] = 0 + + return np.flatnonzero(preds_out > parameters.prediction_threshold) + + def post_processing(preds_track): """ Post processing of prediction probabilities, applies smoothing @@ -46,21 +74,23 @@ def post_processing(preds_track): :return: post-processed predictions """ - # smoothing preds_track = np.convolve(preds_track, np.hamming(4) / np.sum(np.hamming(4)), 'same') # emphasize peaks - preds_track = np.multiply(preds_track, - np.convolve(preds_track, np.hamming(32) / np.sum(np.hamming(32)), 'same')) + if len(preds_track) >= 32: + preds_track = np.multiply(preds_track, + np.convolve(preds_track, np.hamming(32) / np.sum(np.hamming(32)), 'same')) + # unit maximum preds_track /= np.max(preds_track) return preds_track +def get_sort_key(item): + return item[1] -if __name__ == "__main__": - +def run_eval(f_measure_thresh): f_measures = [] precisions = [] recalls = [] @@ -69,39 +99,49 @@ def post_processing(preds_track): preds = np.reshape(preds, len(preds)) for i, f in enumerate(test_files): - - print("Evaluating {}".format(f)) - # load annotations segment_times = get_segment_times(f, paths.annotations_path) # get beat times - beat_times = get_beat_times(f, paths.beats_path) + beat_times, beat_numbers = get_beat_times(f, paths.beats_path, include_beat_numbers=True) # get predictions for current track preds_track = np.squeeze(np.asarray(preds[test_idx == i])) - # post processing - preds_track = post_processing(preds_track) - peak_loc = peakutils.indexes(preds_track, min_dist=8, thres=0.1) + if len(preds_track) == 0: + continue - pred_times = beat_times[peak_loc] - 1 + pred_indexes = choose_preds(preds_track, beat_times) + pred_times = beat_times[pred_indexes] # compute f-measure - f_score, p, r = mir_eval.onset.f_measure(segment_times, pred_times, window=f_measure_thresh) + f_score, p, r = mir_eval.onset.f_measure(np.sort(segment_times), np.sort(pred_times), window=f_measure_thresh) f_measures.append(f_score) precisions.append(p) recalls.append(r) - print("f-Measure: {}, precision: {}, recall: {}".format(f_score, p, r)) - mean_f = np.mean(np.asarray(f_measures)) mean_p = np.mean(np.asarray(precisions)) mean_r = np.mean(np.asarray(recalls)) - print(" ") - print("Mean scores across all test tracks:") - print("f-Measure: {}, precision: {}, recall: {}".format(mean_f, mean_p, mean_r)) + print("mean f-Measure for {}: {}, precision: {}, recall: {}".format(f_measure_thresh, mean_f, mean_p, mean_r)) + return list(zip(test_files, f_measures, precisions, recalls)) + +def get_sort_key(item): + return item[1] + +if __name__ == "__main__": + run_eval(0.2) + short = run_eval(0.5) + long = run_eval(3.0) + + for i in range(len(short)): + short[i] += long[i][1:4] + + sorted_tracks = sorted(short, key=get_sort_key) + print("{:<20}{:4}\t{:4}\t{:4}\t{:4}\t{:4}\t{:4}".format("filename", "f0.5", "p0.5", "r0.5", "f3", "p3", "r3")) + for track in sorted_tracks: + print("{:<20}{:4.2}\t{:4.2}\t{:4.2}\t{:4.2}\t{:4.2}\t{:4.2}".format(*track)) diff --git a/Python/feature_extraction.py b/Python/feature_extraction.py index 3155de8..503b711 100644 --- a/Python/feature_extraction.py +++ b/Python/feature_extraction.py @@ -20,50 +20,139 @@ import random import pickle import paths +import warnings +import time +import pdb import multiprocessing, logging +from contextlib import contextmanager from utils import * import scipy +import skimage.measure +from scipy.spatial import distance -context_length = 65 # how many beats make up a context window for the CNN -num_mel_bands = 80 # number of Mel bands -neg_frames_factor = 5 # how many more negative examples than segment boundaries -pos_frames_oversample = 5 # oversample positive frames because there are too few -mid_frames_oversample = 3 # oversample frames between segments -label_smearing = 1 # how many frames are positive examples around an annotation +from parameters import * + +# for debugging +if False: + do_async = False + max_tracks = 1 +else: + do_async = True + max_tracks = None random.seed(1234) # for reproducibility np.random.seed(1234) +def debug_signal_handler(signal, frame): + pdb.set_trace() + +def compute_sslm(input_vector, beat_times, hop_size): + # stack (bag?) two frames + m = 2 + x = [np.roll(input_vector,n,axis=1) for n in range(m)] + x_hat = np.concatenate(x, axis=0) + + x_hat_length = x_hat.shape[1] + + sslm_shape = sslm_length * 3 # because we'll max pool it down at the end + + #Cosine distance calculation: D[N/p,L/p] matrix + distances = np.full((x_hat_length, sslm_shape), 1.0, dtype=np.float32) #D has as dimensions N/p and L/p + for i in range(x_hat_length): + for l in range(sslm_shape): + # note that negative indices here make our matrix 'time-circular' + cosine_dist = distance.cosine(x_hat[:,i], x_hat[:,i-(l+1)]) #cosine distance between columns i and i-L + distances[i,l] = cosine_dist + + #Threshold epsilon[N/p,L/p] calculation + kappa = 0.1 #equalization factor of 10% + + epsilon_buf = np.empty((sslm_shape, sslm_shape * 2), dtype=np.float32) + epsilon = np.empty((distances.shape[0], sslm_shape), dtype=np.float32) + + for i in range(distances.shape[0]): + for l in range(sslm_shape): + epsilon_buf[l] = np.concatenate((distances[i-(l+1),:], distances[i,:])) + + epsilon[i] = np.quantile(epsilon_buf, kappa, axis=1) + for l in range(sslm_shape): + if epsilon[i, l] == 0: + epsilon[i,l] = 0.000000001 + + + sslm = scipy.special.expit(1-distances/epsilon) # sigmoid + sslm = np.transpose(sslm) -def compute_beat_mls(filename, beat_times, mel_bands=num_mel_bands, fft_size=1024, hop_size=512): + beat_frames = np.round(beat_times * (22050. / hop_size)).astype('int') + beat_sslms = np.zeros((sslm_length, sslm_length, beat_frames.shape[0]), dtype=np.float32) + + for k in range(beat_frames.shape[0]): + sslm_frame = beat_frames[k] // max_pool + sslm_frame_min = sslm_frame - sslm_shape // 2 + sslm_frame_max = sslm_frame + sslm_shape // 2 + 1 + beat_sslm = np.take(sslm, range(sslm_frame_min, sslm_frame_max), mode='wrap', axis=1) + beat_sslms[:,:,k] = skimage.measure.block_reduce(beat_sslm, (3,3), np.max) + + return beat_sslms + + + +def compute_mls_sslm(waveform, beat_times, mel_bands=num_mel_bands, fft_size=1024, hop_size=512): """ - Compute average Mel log spectrogram per beat given previously - extracted beat times. + Compute self-similarilty lag matrix (SSLM) using mel-log spectrogram as input - :param filename: path to audio file + :param waveform: raw waveform data :param beat_times: list of beat times in seconds :param mel_bands: number of Mel bands :param fft_size: FFT size :param hop_size: hop size for FFT processing - :return: beat Mel spectrogram (mel_bands x frames) + :return: beat sslm """ + spec = np.abs(librosa.stft(y=waveform, n_fft=fft_size, hop_length=hop_size, win_length=fft_size, + window=scipy.signal.hamming)) - computed_mls_file = paths.get_mls_path(filename) + mel_fb = librosa.filters.mel(sr=22050, n_fft=fft_size, n_mels=mel_bands, fmin=50, fmax=10000, htk=True) + s = np.sum(mel_fb, axis=1) + mel_fb = np.divide(mel_fb, s[:, np.newaxis]) - if os.path.exists(computed_mls_file): - return np.load(computed_mls_file) + mel_spec = np.dot(mel_fb, spec) + S_to_dB = librosa.power_to_db(mel_spec,ref=np.max) - if "/" in filename: - path = filename - else: - path = os.path.join(paths.audio_path, filename) + # first max-pooling: by 2. + x_prime = skimage.measure.block_reduce(S_to_dB, (1,max_pool), np.max) + + MFCCs = scipy.fftpack.dct(x_prime, axis=0, type=2, norm='ortho') + MFCCs = MFCCs[1:,:] + 1 + + return compute_sslm(MFCCs, beat_times, hop_size) + +def compute_chroma_sslm(waveform, beat_times, mel_bands=num_mel_bands, fft_size=1024, hop_size=512): + spec = librosa.stft(y=waveform, n_fft=fft_size, hop_length=hop_size, win_length=fft_size, window=scipy.signal.hamming) + spec = np.abs(spec) + x_prime = skimage.measure.block_reduce(spec, (1,max_pool), np.max) - y, sr = librosa.load(path, sr=22050, mono=True) + chroma_fb = librosa.filters.chroma(22050, fft_size, n_chroma=12) + chromagram = np.dot(chroma_fb, x_prime) + chromagram = librosa.power_to_db(chromagram,ref=np.max) - spec = np.abs(librosa.stft(y=y, n_fft=fft_size, hop_length=hop_size, win_length=fft_size, + return compute_sslm(chromagram + 1, beat_times, hop_size) + +def compute_beat_mls(features, beat_times, mel_bands=num_mel_bands, fft_size=1024, hop_size=512): + """ + Compute average Mel log spectrogram per beat given previously + extracted beat times. + + :param filename: path to audio file + :param beat_times: list of beat times in seconds + :param mel_bands: number of Mel bands + :param fft_size: FFT size + :param hop_size: hop size for FFT processing + :return: beat Mel spectrogram (mel_bands x frames) + """ + spec = np.abs(librosa.stft(y=features, n_fft=fft_size, hop_length=hop_size, win_length=fft_size, window=scipy.signal.hamming)) mel_fb = librosa.filters.mel(sr=22050, n_fft=fft_size, n_mels=mel_bands, fmin=50, fmax=10000, htk=True) @@ -83,20 +172,71 @@ def compute_beat_mls(filename, beat_times, mel_bands=num_mel_bands, fft_size=102 beat_melspec = np.column_stack((beat_melspec, mel_spec[:, beat_frames.shape[0]])) - np.save(computed_mls_file, beat_melspec) - - beat_melspec = np.column_stack((beat_melspec, mel_spec[:, beat_frames.shape[0]])) return beat_melspec +def compute_time_features(features, beat_times): + length = len(features) / 22050. + time_ratios = np.zeros((len(beat_times), 500), dtype=np.float32) -def compute_features(logger, f, i, audio_files): - logger.info("Track {} / {} ({})".format(i, len(audio_files), f)) + for k in range(len(beat_times)): + time_ratios[k, int((beat_times[k] * 500) // length)] = 1.0 + + return time_ratios + + +def load_waveform(filename): + if "/" in filename: + path = filename + else: + path = os.path.join(paths.audio_path, filename) + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + y, sr = librosa.load(path, sr=22050, mono=True) + return y + +def with_audio_cache(filename, ext, waveform, beat_times, genf): + path = paths.get_audio_cache_path(filename, ext) + + if os.path.exists(path): + return np.load(path, mmap_mode='r'), waveform + else: + if waveform is None: + waveform = load_waveform(filename) + + data = genf(waveform, beat_times) + np.save(path, data) + return data, waveform + +def make_beat_time_features(beat_numbers): + times = np.zeros((len(beat_numbers), 4)) + for i in range(len(beat_numbers)): + times[i][beat_numbers[i] - 1] = 1 + return times + +def compute_features(f): + beat_times, beat_numbers = get_beat_times(os.path.join(paths.audio_path, f), paths.beats_path, include_beat_numbers=True) + + def gen_beat_mls(waveform, beat_times): + beat_mls = compute_beat_mls(waveform, beat_times) + beat_mls /= np.max(beat_mls) + return beat_mls + + waveform = None + beat_mls, waveform = with_audio_cache(f, '.mls_115.npy', waveform, beat_times, gen_beat_mls) + beat_mls_sslm, waveform = with_audio_cache(f, '.mls_sslm_115.npy', waveform, beat_times, compute_mls_sslm) + #times, waveform = with_audio_cache(f, '.beat_time_ratios.npy', waveform, beat_times, compute_time_features) + times = make_beat_time_features(beat_numbers) + + #chroma_sslm, waveform = with_audio_cache(f, '.chroma_sslm.npy', waveform, beat_times, compute_chroma_sslm) + + #beat_sslm = np.stack((beat_mls_sslm, chroma_sslm), axis=3) - beat_times = get_beat_times(os.path.join(paths.audio_path, f), paths.beats_path) + return beat_mls, beat_mls_sslm, times, beat_times - beat_mls = compute_beat_mls(f, beat_times) - beat_mls /= np.max(beat_mls) - return beat_mls, beat_times +def compute_features_async(logger, f, i, audio_files): + logger.info("Track {} / {} ({})".format(i, len(audio_files), f)) + compute_features(f) def batch_extract_mls_and_labels(audio_files, beats_folder, annotation_folder): """ @@ -111,20 +251,39 @@ def batch_extract_mls_and_labels(audio_files, beats_folder, annotation_folder): """ feature_list = [] + sslm_feature_list = [] + time_feature_list = [] labels_list = [] failed_tracks_idx = [] + async_res = [] logger = multiprocessing.log_to_stderr() logger.setLevel(logging.INFO) + n_tracks = 0 with multiprocessing.Pool(processes=8) as pool: - for i, f in enumerate(audio_files): - async_res.append(pool.apply_async(compute_features, (logger, f, i, audio_files, ))) + if do_async: + for i, f in enumerate(audio_files): + async_res.append(pool.apply_async(compute_features_async, (logger, f, i, audio_files, ))) for i, f in enumerate(audio_files): - beat_mls, beat_times = async_res[i].get() + if do_async: + try: + # have child process actually write features to disk + async_res[i].get() + + # now reload them in mmap + beat_mls, beat_sslm, time_features, beat_times = compute_features(f) + except Exception as inst: + print("error processing {}".format(f)) + print(inst) + failed_tracks_idx.append(i) + continue + else: + beat_mls, beat_sslm, time_features, beat_times = compute_features(f) + label_vec = np.zeros(beat_mls.shape[1],) segment_times = get_segment_times(f, paths.annotations_path) @@ -139,9 +298,15 @@ def batch_extract_mls_and_labels(audio_files, beats_folder, annotation_folder): label_vec[closest_beat] = 1. feature_list.append(beat_mls) + sslm_feature_list.append(beat_sslm) + time_feature_list.append(time_features) labels_list.append(label_vec) - return feature_list, labels_list, failed_tracks_idx + if max_tracks is not None and n_tracks > max_tracks: + break + n_tracks += 1 + + return feature_list, sslm_feature_list, time_feature_list, labels_list, failed_tracks_idx def normalize_features_per_band(features, mean_vec=None, std_vec=None, subsample=10000): @@ -157,6 +322,7 @@ def normalize_features_per_band(features, mean_vec=None, std_vec=None, subsample if mean_vec is None: # subsample features + print("sampling") idx = random.sample(range(features.shape[0]), min(features.shape[0], subsample)) temp_features = features[idx, :, :] @@ -171,13 +337,14 @@ def normalize_features_per_band(features, mean_vec=None, std_vec=None, subsample mean_vec = np.mean(temp_features, axis=0) std_vec = np.std(temp_features, axis=0) - features = features - mean_vec[np.newaxis, :, np.newaxis] - features = features / std_vec[np.newaxis, :, np.newaxis] + print("modifying...") + features -= mean_vec[np.newaxis, :, np.newaxis] + features /= std_vec[np.newaxis, :, np.newaxis] return features, mean_vec, std_vec -def prepare_batch_data(feature_list, labels_list, is_training=True): +def prepare_batch_data(feature_list, sslm_feature_list, time_feature_list, labels_list, is_training=True): """ Reads precomputed beat Mel spectrograms and slices them into context windows for CNN training. For the training set, subsampling is @@ -189,20 +356,20 @@ def prepare_batch_data(feature_list, labels_list, is_training=True): :return: batch data in the form (n_items, n_melbands, n_context) """ - n_preallocate = 250000 + n_preallocate = 500000 # initialize arrays for storing context windows data_x = np.zeros(shape=(n_preallocate, num_mel_bands, context_length), dtype=np.float32) + data_sslm_x = np.zeros(shape=(n_preallocate, sslm_length, sslm_length), dtype=np.float32) + data_time_x = np.zeros(shape=(n_preallocate, time_feature_list[0].shape[1]), dtype=np.float32) data_y = np.zeros(shape=(n_preallocate,), dtype=np.float32) data_weight = np.zeros(shape=(n_preallocate,), dtype=np.float32) track_idx = np.zeros(shape=(n_preallocate,), dtype=int) feature_count = 0 current_track = 0 - padding_length = int(context_length / 2) - - for features, labels in zip(feature_list, labels_list): + for features, sslm_features, time_features, labels in zip(feature_list, sslm_feature_list, time_feature_list, labels_list): print("Processed {} examples from {} tracks".format(feature_count, current_track+1)) num_beats = features.shape[1] @@ -212,6 +379,17 @@ def prepare_batch_data(feature_list, labels_list, is_training=True): labels = np.concatenate((np.zeros(padding_length), labels, np.zeros(padding_length)), axis=0) + def add_feature(idx, label, weight=1): + nonlocal feature_count + data_x[feature_count, :, :] = features[:, idx - padding_length: idx + padding_length + 1] + data_sslm_x[feature_count] = sslm_features[:, :, idx - padding_length] + data_time_x[feature_count] = time_features[idx - padding_length] + data_y[feature_count] = label + data_weight[feature_count] = weight + track_idx[feature_count] = current_track + + feature_count += 1 + if is_training is True: # take all positive frames. these are indexes into the already padded features. @@ -220,34 +398,15 @@ def prepare_batch_data(feature_list, labels_list, is_training=True): for rep in range(pos_frames_oversample): for k in positive_frames_idx: - - next_window = features[:, k - padding_length: k + padding_length + 1] - next_label = 1 - next_weight = 1 - - data_x[feature_count, :, :] = next_window - data_y[feature_count] = next_label - data_weight[feature_count] = next_weight - track_idx[feature_count] = current_track - - feature_count += 1 + add_feature(k, label=1) # apply label smearing: set labels around annotation to 1 and give them a triangular weight for l in range(k - label_smearing, k + label_smearing + 1): # don't smear into padding. if padding_length <= l < num_beats + padding_length and l != k: - - next_window = features[:, l-padding_length: l+padding_length+1] - next_label = 1 next_weight = 1. - np.abs(l-k) / (label_smearing + 1.) - - data_x[feature_count, :, :] = next_window - data_y[feature_count] = next_label - data_weight[feature_count] = next_weight - track_idx[feature_count] = current_track - - feature_count += 1 + add_feature(l, label=0.5, weight=next_weight) # take all frames in the middle between two boundaries (typical false positives) mid_segment_frames_idx = (positive_frames_idx[1:] + positive_frames_idx[:-1]) / 2 @@ -259,15 +418,7 @@ def prepare_batch_data(feature_list, labels_list, is_training=True): for l in range(k - label_smearing, k + label_smearing + 1): if padding_length <= l < num_beats + padding_length: - - next_window = features[:, l-padding_length: l+padding_length+1] - - data_x[feature_count, :, :] = next_window - data_y[feature_count] = 0 - data_weight[feature_count] = 1 - track_idx[feature_count] = current_track - - feature_count += 1 + add_feature(l, label=0) # sample randomly from the remaining frames remaining_frames_idx = [] @@ -280,42 +431,25 @@ def prepare_batch_data(feature_list, labels_list, is_training=True): for k in range(num_neg_frames): next_idx = random.sample(remaining_frames_idx, 1)[0] - next_window = features[:, next_idx-padding_length: next_idx+padding_length+1] - next_label = 0 - next_weight = 1 - - data_x[feature_count, :, :] = next_window - data_y[feature_count] = next_label - data_weight[feature_count] = next_weight - track_idx[feature_count] = current_track - - feature_count += 1 + add_feature(next_idx, label=0) else: # test data -> extract all context windows and keep track of track indices for k in range(padding_length, num_beats + padding_length): - - next_window = features[:, k-padding_length: k+padding_length+1] - next_label = labels[k] - next_weight = 1 - - data_x[feature_count, :, :] = next_window - data_y[feature_count] = next_label - data_weight[feature_count] = next_weight - track_idx[feature_count] = current_track - - feature_count += 1 + add_feature(k, label=labels[k]) current_track += 1 if feature_count > n_preallocate: break - data_x = data_x[:feature_count, :, :] + data_x.resize((feature_count, data_x.shape[1], data_x.shape[2])) + data_sslm_x.resize((feature_count, data_sslm_x.shape[1], data_sslm_x.shape[2])) + data_time_x.resize((feature_count, data_time_x.shape[1])) data_y = data_y[:feature_count] data_weight = data_weight[:feature_count] track_idx = track_idx[:feature_count] - return data_x, data_y, data_weight, track_idx + return data_x, data_sslm_x, data_time_x, data_y, data_weight, track_idx def load_raw_features(file): @@ -333,6 +467,8 @@ def load_raw_features(file): if __name__ == "__main__": + #import signal + #signal.signal(signal.SIGINT, debug_signal_handler) train_frame = pd.read_csv('../Data/train_tracks.txt', header=None) test_frame = pd.read_csv('../Data/test_tracks.txt', header=None) @@ -342,13 +478,13 @@ def load_raw_features(file): print("Extracting MLS features") - train_features, train_labels, train_failed_idx = batch_extract_mls_and_labels(train_files, - paths.beats_path, - paths.annotations_path) + train_features, train_sslm_features, train_time_features, train_labels, train_failed_idx = batch_extract_mls_and_labels(train_files, + paths.beats_path, + paths.annotations_path) - test_features, test_labels, test_failed_idx = batch_extract_mls_and_labels(test_files, - paths.beats_path, - paths.annotations_path) + test_features, test_sslm_features, test_time_features, test_labels, test_failed_idx = batch_extract_mls_and_labels(test_files, + paths.beats_path, + paths.annotations_path) print("Extracted features for {} training and {} test tracks".format(len(train_features), len(test_features))) @@ -360,21 +496,22 @@ def load_raw_features(file): del test_files[i] with open('../Data/rawFeatures.pickle', 'wb') as f: - pickle.dump((train_features, train_labels, test_features, test_labels), f) + pickle.dump((train_features, train_sslm_features, train_labels, test_features, test_sslm_features, test_labels), f) # train_features, train_labels, test_features, test_labels = load_raw_features('../Data/rawFeatures.pickle') - train_x, train_y, train_weights, train_idx = prepare_batch_data(train_features, train_labels, is_training=True) - test_x, test_y, test_weights, test_idx = prepare_batch_data(test_features, test_labels, is_training=False) + train_x, train_sslm_x, train_time_x, train_y, train_weights, train_idx = prepare_batch_data(train_features, train_sslm_features, train_time_features, train_labels, is_training=True) + test_x, test_sslm_x, test_time_x, test_y, test_weights, test_idx = prepare_batch_data(test_features, test_sslm_features, test_time_features, test_labels, is_training=False) + print("normalizing features") train_x, mean_vec, std_vec = normalize_features_per_band(train_x) test_x, mean_vec, std_vec = normalize_features_per_band(test_x, mean_vec, std_vec) print("Prepared {} training items and {} test items".format(train_x.shape[0], test_x.shape[0])) # store normalized features for CNN training - np.savez('../Data/trainDataNormalized.npz', train_x=train_x, train_y=train_y, train_weights=train_weights) - np.savez('../Data/testDataNormalized.npz', test_x=test_x, test_y=test_y, test_weights=test_weights) + np.savez('../Data/trainDataNormalized.npz', train_x=train_x, train_sslm_x=train_sslm_x, train_time_x=train_time_x, train_y=train_y, train_weights=train_weights) + np.savez('../Data/testDataNormalized.npz', test_x=test_x, test_sslm_x=test_sslm_x, test_time_x=test_time_x, test_y=test_y, test_weights=test_weights) np.savez('../Data/normalization.npz', mean_vec=mean_vec, std_vec=std_vec) # store file lists and index mapping to training and test data diff --git a/Python/parameters.py b/Python/parameters.py new file mode 100644 index 0000000..b048dc7 --- /dev/null +++ b/Python/parameters.py @@ -0,0 +1,32 @@ +# thresholding value for prediction-choice algorithm. trade recall for accuracy here. +prediction_threshold = 0.3 + +# should we include (MLS, SSLM, beat #) features when training? +training_features = {'mls', 'sslm', 'beat_numbers'} + +# how many beats make up a context window for the MLS part of the network +context_length = 115 + +# number of Mel bands +num_mel_bands = 80 + +# how many frames to max-pool in building the SSLM +max_pool = 2 + +# how far back to calculate the SSLM (note that actual length will be max_pool * sslm_length) +sslm_length = 115 + +# how many more negative examples than segment boundaries +neg_frames_factor = 5 + +# oversample positive frames because there are too few +pos_frames_oversample = 5 + +# oversample frames between segments +mid_frames_oversample = 3 + +# how many frames are semi-positive examples around an annotation +label_smearing = 1 + +padding_length = int(context_length / 2) + diff --git a/Python/paths.py b/Python/paths.py index 00ccd13..55856e4 100644 --- a/Python/paths.py +++ b/Python/paths.py @@ -17,9 +17,14 @@ # where to find SALAMI annotations annotations_path = '../Data/salami-data-public/annotations/' +viz_path = '../Audio/viz' + def remove_suffix(filename): return os.path.splitext(os.path.basename(filename))[0] -def get_mls_path(audio_filename): - return os.path.join(mls_path, remove_suffix(audio_filename) + '.mls.npy') +def with_suffix(path, ext): + return remove_suffix(path) + '.' + ext + +def get_audio_cache_path(audio_filename, ext): + return os.path.join(mls_path, remove_suffix(audio_filename) + ext) diff --git a/Python/track_segmentation.py b/Python/track_segmentation.py index 8f4e538..ecce646 100644 --- a/Python/track_segmentation.py +++ b/Python/track_segmentation.py @@ -13,29 +13,33 @@ import os, sys import numpy as np import pandas as pd -from feature_extraction import compute_beat_mls, normalize_features_per_band -from evaluation import post_processing +from feature_extraction import compute_features, normalize_features_per_band +from evaluation import post_processing, choose_preds from train_segmentation_cnn import build_model -import peakutils normalization_path = '../Data/normalization.npz' model_weights = '../Data/model_weights_100epochs_lr005.h5' out_dir = '../Temp/' -num_mel_bands = 80 -context_length = 65 -padding = int(context_length / 2) +from parameters import context_length, num_mel_bands, padding_length -def compute_cnn_predictions(features): + +def build_full_model(): + model = build_model(num_mel_bands, context_length, context_length) + model.load_weights(model_weights) + model.compile(loss='binary_crossentropy', optimizer='sgd') + return model + +def compute_cnn_predictions(mls_features, sslm_features, time_features): """ Apply pretrained CNN model to features and return predictions. """ - model = build_model(num_mel_bands, context_length) - model.load_weights(model_weights) - model.compile(loss='binary_crossentropy', optimizer='sgd') + model = build_full_model() + + mls_features = np.expand_dims(mls_features, 3) + sslm_features = np.transpose(sslm_features, (2, 0, 1)) - features = np.expand_dims(features, 3) - predictions = model.predict(features, batch_size=1) + predictions = model.predict([mls_features, sslm_features, time_features], batch_size=1) return predictions @@ -49,10 +53,10 @@ def extract_features(audio_file, beats_file): """ t = pd.read_table(beats_file, header=None) - beat_times = t.iloc[:, 0].values + beat_times = t[0].values + beat_numbers = t[1].values - beat_mls = compute_beat_mls(filename=audio_file, beat_times=beat_times) - beat_mls /= np.max(beat_mls) + beat_mls, sslm, time_features, beat_times = compute_features(audio_file) features = compute_context_windows(beat_mls) norm_data = np.load(normalization_path) @@ -60,7 +64,7 @@ def extract_features(audio_file, beats_file): std_vec = norm_data['std_vec'] features, mean_vec, std_vec = normalize_features_per_band(features, mean_vec, std_vec) - return features, beat_times + return features, sslm, time_features, beat_times def compute_context_windows(features): @@ -73,21 +77,20 @@ def compute_context_windows(features): n_preallocate = 10000 - features = np.hstack((0.001 * np.random.rand(num_mel_bands, padding), features, - 0.001 * np.random.rand(num_mel_bands, padding))) + features = np.hstack((0.001 * np.random.rand(num_mel_bands, padding_length), features, + 0.001 * np.random.rand(num_mel_bands, padding_length))) # initialize arrays for storing context windows data_x = np.zeros(shape=(n_preallocate, num_mel_bands, context_length), dtype=np.float32) feature_count = 0 - num_beats = features.shape[1] - - for k in range(padding, num_beats-padding): + num_padded_features = features.shape[1] + for k in range(padding_length, num_padded_features - padding_length): if feature_count > n_preallocate: break - next_window = features[:, k-padding: k+padding+1] + next_window = features[:, k-padding_length: k+padding_length+1] data_x[feature_count, :, :] = next_window feature_count += 1 @@ -96,16 +99,28 @@ def compute_context_windows(features): return data_x +def print_predictions(p, beat_times): + for i in range(len(p)): + print("%i:\t%.3f\t%.1f" % (i, p[i], beat_times[i])) + + def compute_segments_from_predictions(predictions, beat_times): """ Computes the segment times from a prediction curve and the beat times using peak picking. """ predictions = np.squeeze(predictions) - predictions = post_processing(predictions) - peak_loc = peakutils.indexes(predictions, min_dist=8, thres=0.05) + + print("raw predicitions:") + print_predictions(predictions, beat_times) + peak_loc = choose_preds(predictions, beat_times) + segment_times = beat_times[peak_loc] + print("beat_num\ttime:") + for i in peak_loc: + print("%i\t%.2f" % (i, beat_times[i])) + return segment_times @@ -127,17 +142,21 @@ def compute_segments_from_predictions(predictions, beat_times): if not os.path.isfile(out_dir + file_name + '.beats.txt'): print("Extracting beat times (this might take a while)...") - os.system('DBNBeatTracker \'single\' "' + audio_file + '" -o "' + out_dir + file_name + '.beats.txt"') + os.system('DBNDownBeatTracker \'single\' "' + audio_file + '" -o "' + out_dir + file_name + '.beats.txt"') print("Computing features") - mls_features, beat_times = extract_features(audio_file, out_dir + file_name + '.beats.txt') + mls_features, sslm, time_features, beat_times = extract_features(audio_file, out_dir + file_name + '.beats.txt') print("Computing CNN predictions") - predictions = compute_cnn_predictions(mls_features) + predictions = compute_cnn_predictions(mls_features, sslm, time_features) print("Get segment times") segment_times = compute_segments_from_predictions(predictions, beat_times) + print("\n") + for f in segment_times: + print(f) + print("The result has been stored in " + output_file) np.savetxt(output_file, segment_times, fmt='%4.2f', delimiter='\n') diff --git a/Python/train_segmentation_cnn.py b/Python/train_segmentation_cnn.py index 1b81912..c2680e7 100644 --- a/Python/train_segmentation_cnn.py +++ b/Python/train_segmentation_cnn.py @@ -12,11 +12,20 @@ from keras.models import Sequential from keras.layers.core import Dense, Dropout, Activation, Flatten from keras.layers.convolutional import Convolution2D, MaxPooling2D +import tensorflow.keras.layers +from tensorflow.keras.models import Model + + +import tensorflow as tf +from tensorflow import keras +from tensorflow.keras import layers + from keras.callbacks import EarlyStopping from keras.optimizers import SGD -np.random.seed(1234) # for reproducibility +np.random.seed(1235) # for reproducibility +import parameters def load_training_data(dataset): """ @@ -30,12 +39,8 @@ def load_training_data(dataset): :return train_weights (n_items x 1) """ - data = np.load(dataset) - train_x = data['train_x'] - train_y = data['train_y'] - train_weights = data['train_weights'] - - return train_x, train_y, train_weights + data = np.load(dataset, mmap_mode='r') + return data['train_x'], data['train_sslm_x'], data['train_time_x'], data['train_y'], data['train_weights'] def load_test_data(dataset): @@ -50,34 +55,70 @@ def load_test_data(dataset): :return test_weights (n_items x 1) """ - data = np.load(dataset) - test_x = data['test_x'] - test_y = data['test_y'] - test_weights = data['test_weights'] + data = np.load(dataset, mmap_mode='r') + return data['test_x'], data['test_sslm_x'], data['test_time_x'], data['test_y'], data['test_weights'] + + +def build_model(mls_rows, mls_cols, sslm_shape): + inputs = [] + merged_input = [] + + if 'mls' in parameters.training_features: + mls_input = layers.Input(shape=(mls_rows, mls_cols, 1), name='mls_input') + mls = layers.Conv2D(16, (6, 8), activation='relu', name='mls_conv')(mls_input) + mls = layers.MaxPooling2D(pool_size=(3, 6), name='mls_maxpool')(mls) + merged_input.append(mls) + inputs.append(mls_input) + + if 'sslm' in parameters.training_features: + sslm_input = layers.Input(shape=(sslm_shape, sslm_shape, 1), name='sslm_input') + sslm = layers.Conv2D(16, (8, 8), activation='relu', name='sslm_conv')(sslm_input) + sslm = layers.MaxPooling2D(pool_size=(6, 6), name='sslm_maxpool')(sslm) + + merged_input.append(sslm) + inputs.append(sslm_input) + + if len(merged_input) > 1: + merged = layers.Concatenate(axis=1, name='mls_sslm_concat')(merged_input) + else: + merged = merged_input[0] + + merged = layers.Conv2D(64, (6, 3), activation='relu', name='concat_conv')(merged) + merged = layers.Dropout(0.5, name='concat_dropout')(merged) - return test_x, test_y, test_weights + merged = layers.Flatten()(merged) + merged = layers.Dense(256, activation='relu', name='final_dense')(merged) + merged = layers.Dropout(0.5, name='final_dropout')(merged) -def build_model(img_rows, img_cols): + final_dense_input = [merged] + if 'beat_numbers' in parameters.training_features: + time_input = layers.Input(shape=(4,), name='time_input') + time = layers.Dense(1, activation='relu', name='time_dense')(time_input) + final_dense_input.append(time) + inputs.append(time_input) - model = Sequential() + if len(final_dense_input) > 1: + merged = layers.Concatenate(name='final_concat')(final_dense_input) + else: + merged = final_dense_input[0] - model.add(Convolution2D(32, (6, 8), input_shape=(img_rows, img_cols, 1))) - model.add(Activation('relu')) - model.add(MaxPooling2D(pool_size=(5, 2))) - model.add(Convolution2D(64, (4, 6))) - model.add(Activation('relu')) - model.add(MaxPooling2D(pool_size=(2, 2))) - model.add(Dropout(0.5)) - model.add(Flatten()) - model.add(Dense(256)) - model.add(Activation('relu')) - model.add(Dropout(0.5)) - model.add(Dense(1)) - model.add(Activation('sigmoid')) + merged = layers.Dense(1, activation='sigmoid', name='final_sigmoid')(merged) - return model + return Model(inputs=inputs, outputs = merged) +def make_input(mls, sslm, time): + input = [] + if 'mls' in parameters.training_features: + input.append(mls) + + if 'sslm' in parameters.training_features: + input.append(sslm) + + if 'beat_numbers' in parameters.training_features: + input.append(time) + + return input def train_model(batch_size=128, nb_epoch=100, save_ext='_100epochs_lr005', weights_file=None): """ @@ -90,23 +131,27 @@ def train_model(batch_size=128, nb_epoch=100, save_ext='_100epochs_lr005', weigh """ print('loading training data...') - X_train, y_train, w_train = load_training_data('../Data/trainDataNormalized.npz') + X_train, x_sslm_train, x_time_train, y_train, w_train = load_training_data('../Data/trainDataNormalized.npz') print('training data size:') print(X_train.shape) + img_rows = X_train.shape[1] + img_cols = X_train.shape[2] + model = build_model(img_rows, img_cols, x_sslm_train.shape[1]) + p = np.random.permutation(X_train.shape[0]) + X_train = X_train[p, :, :] + x_sslm_train = x_sslm_train[p, :, :] + x_time_train = x_time_train[p] y_train = y_train[p] w_train = w_train[p] X_train = X_train.astype('float32') X_train = np.expand_dims(X_train, 3) + x_sslm_train = np.expand_dims(x_sslm_train, 3) - img_rows = X_train.shape[1] - img_cols = X_train.shape[2] - - model = build_model(img_rows, img_cols) if weights_file is not None: model.load_weights(weights_file) @@ -114,24 +159,26 @@ def train_model(batch_size=128, nb_epoch=100, save_ext='_100epochs_lr005', weigh sgd = SGD(lr=0.05, decay=1e-4, momentum=0.9, nesterov=True) model.compile(loss='binary_crossentropy', optimizer=sgd) - #early_stopping = EarlyStopping(monitor='val_loss', patience=5) + early_stopping = EarlyStopping(monitor='val_loss', patience=15) print('train model...') - model.fit(X_train, y_train, batch_size=batch_size, epochs=nb_epoch, shuffle=True, - verbose=1, validation_split=0.1, sample_weight=w_train, callbacks=[]) + + model.fit(x=make_input(X_train, x_sslm_train, x_time_train), y=y_train, batch_size=batch_size, epochs=nb_epoch, shuffle=True, + verbose=1, validation_split=0.1, sample_weight=w_train, callbacks=[early_stopping]) print('load test data...') - X_test, y_test, w_test = load_test_data('../Data/testDataNormalized.npz') + X_test, x_sslm_test, x_time_test, y_test, w_test = load_test_data('../Data/testDataNormalized.npz') X_test = X_test.astype('float32') X_test = np.expand_dims(X_test, 3) + x_sslm_test = np.expand_dims(x_sslm_test, 3) print('predict test data...') - preds = model.predict(X_test, batch_size=1, verbose=1) + preds = model.predict(make_input(X_test, x_sslm_test, x_time_test), batch_size=1, verbose=1) print('saving results...') np.save('../Data/predsTestTracks' + save_ext + '.npy', preds) - score = model.evaluate(X_test, y_test, verbose=1) + score = model.evaluate(make_input(X_test, x_sslm_test, x_time_test), y_test, verbose=1) print('Test score:', score) # save model @@ -139,4 +186,4 @@ def train_model(batch_size=128, nb_epoch=100, save_ext='_100epochs_lr005', weigh if __name__ == "__main__": - train_model() + train_model(nb_epoch=200) diff --git a/Python/utils.py b/Python/utils.py index a55b571..30a9145 100644 --- a/Python/utils.py +++ b/Python/utils.py @@ -89,22 +89,29 @@ def get_segment_times(audio_file, annotation_folder): # for some tracks, only one annotation is available, take first one as default # if there is no annotation available, store -1 as error code + try: - label_file = os.path.join(annotation_folder, file_name, 'parsed', 'textfile1_uppercase.txt') + label_file = os.path.join(annotation_folder, file_name, 'parsed', 'textfile3_uppercase.txt') t = pd.read_table(label_file, header=None) except IOError: try: - label_file = os.path.join(annotation_folder, file_name, 'parsed', 'textfile2_uppercase.txt') + label_file = os.path.join(annotation_folder, file_name, 'parsed', 'textfile1_uppercase.txt') t = pd.read_table(label_file, header=None) except IOError: - return -1 + try: + label_file = os.path.join(annotation_folder, file_name, 'parsed', 'textfile2_uppercase.txt') + t = pd.read_table(label_file, header=None) + except IOError: + return -1 + + if t[1].dtype == 'O': + t = t[~(t[1].str.lower().isin(['silence', 'end']))] segment_times = t.iloc[:, 0].values return segment_times - -def get_beat_times(audio_file, beats_folder): +def get_beat_times(audio_file, beats_folder, include_beat_numbers=False): """ Read beat times from annotation file. :param audio_file: path to audio files @@ -114,7 +121,15 @@ def get_beat_times(audio_file, beats_folder): file_name = os.path.splitext(os.path.basename(audio_file))[0] beats_file = os.path.join(beats_folder, file_name + '.beats.txt') + + if not os.path.isfile(beats_file): + print(f"Extracting beat times for {audio_file}") + os.system(f"DBNDownBeatTracker single '{audio_file}' -o '{beats_file}'") + t = pd.read_table(beats_file, header=None) - beat_times = t.iloc[:, 0].values - return beat_times + if include_beat_numbers: + return t[0].values, t[1].values + else: + return t[0].values + diff --git a/Python/visualization.py b/Python/visualization.py index ea540d3..8259051 100644 --- a/Python/visualization.py +++ b/Python/visualization.py @@ -8,8 +8,11 @@ import numpy as np from feature_extraction import load_raw_features from evaluation import post_processing +from utils import get_beat_times import matplotlib.pyplot as plt import pickle +import paths +import os def visualize_predictions(): @@ -19,31 +22,36 @@ def visualize_predictions(): """ preds = np.load('../Data/predsTestTracks_100epochs_lr005.npy') - train_features, train_labels, test_features, test_labels = load_raw_features('../Data/rawFeatures.pickle') - data = np.load('../Data/testDataNormalized.npz') test_y = data['test_y'] # load file lists and indices with open('../Data/fileListsAndIndex.pickle', 'rb') as f: - train_files, train_idx, test_files, test_idx = pickle.load(f) - - for i in range(len(test_labels)): + train_files, train_idx, test_files, test_idx = pickle.load(f) + for i in range(len(test_files)): f = test_files[i] - print f + beat_times, beat_numbers = get_beat_times(f, paths.beats_path, include_beat_numbers=True) + print(f) idx = np.where(test_idx == i)[0] labels = test_y[idx] preds_track = np.squeeze(np.asarray(preds[idx])) - preds_track = post_processing(preds_track) + processed_preds_track = post_processing(preds_track, beat_numbers) + with_downbeat_preds = post_processing(preds_track, beat_numbers, emphasize_downbeat=True) + preds_track = 0.5 + 0.5 * preds_track + processed_preds_track = 1.0 + 0.5 * processed_preds_track + with_downbeat_preds = 1.5 + 0.5 * with_downbeat_preds labels *= 0.5 plt.plot(labels) plt.plot(preds_track) - plt.show() + plt.plot(processed_preds_track) + plt.plot(with_downbeat_preds) + plt.savefig(os.path.join(paths.viz_path, paths.with_suffix(test_files[i], 'svg')), dpi=400) + plt.clf() def visualize_training_data(): diff --git a/README.md b/README.md index 0701ca2..16eba4c 100644 --- a/README.md +++ b/README.md @@ -36,7 +36,7 @@ After that the beat tracking from the MADMOM library can be run on all files wit ```bash cd ./Audio mkdir beats -DBNBeatTracker batch -o ./beats $(ls *.mp3) +DBNDownBeatTracker batch -o ./beats $(ls *.mp3) ``` This will take quite some time and use a lot of memory. After finishing, the beat files (`*.beats.txt`) will be placed next to the audio files. diff --git a/add_files.sh b/add_files.sh new file mode 100755 index 0000000..1216e78 --- /dev/null +++ b/add_files.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +IN_LIST=/tmp/add_files.present +CURRENT=/tmp/add_files.exist + +cat Data/test_tracks.txt Data/train_tracks.txt | sort > $IN_LIST +(cd ~/src/salami-audio && ls -1 *.{mp3,m4a}) | sort > $CURRENT + +newfiles=`comm -3 $IN_LIST $CURRENT | sort -R` +count=`comm -3 $IN_LIST $CURRENT | wc -l` + +i=0 +for x in $newfiles +do + if [ "$i" -gt "$(($count / 9 - 1))" ] + then + echo "$x" to train_tracks + echo $x >> Data/train_tracks.txt + else + echo "$x" to test_tracks + echo $x >> Data/test_tracks.txt + fi + i=$(($i + 1)) +done diff --git a/track_segment.sh b/track_segment.sh new file mode 100755 index 0000000..9002756 --- /dev/null +++ b/track_segment.sh @@ -0,0 +1,5 @@ +#!/bin/bash + +cd Python +mkdir -p ~/src/salami-data-public/annotations/$1/parsed +python ./track_segmentation.py ~/src/salami-audio/$1.* ~/src/salami-data-public/annotations/$1/parsed/predicted.txt