Skip to content

Commit

Permalink
Utilize strtok3 random access reading
Browse files Browse the repository at this point in the history
  • Loading branch information
Borewit committed Nov 26, 2024
1 parent 33341a2 commit 137e639
Show file tree
Hide file tree
Showing 26 changed files with 205 additions and 236 deletions.
10 changes: 9 additions & 1 deletion lib/ParserFactory.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import initDebug from 'debug';
import { type INativeMetadataCollector, MetadataCollector } from './common/MetadataCollector.js';

import type { IAudioMetadata, IOptions, ParserType } from './type.js';
import type { ITokenizer } from 'strtok3';
import type { IRandomAccessTokenizer, ITokenizer } from 'strtok3';
import { mpegParserLoader } from './mpeg/MpegLoader.js';
import { CouldNotDetermineFileTypeError, UnsupportedFileTypeError } from './ParseError.js';
import { apeParserLoader } from './apev2/Apev2Loader.js';
Expand All @@ -22,6 +22,7 @@ import { oggParserLoader } from './ogg/OggLoader.js';
import { wavpackParserLoader } from './wavpack/WavPackLoader.js';
import { riffParserLoader } from './wav/WaveLoader.js';
import { amrParserLoader } from './amr/AmrLoader.js';
import { scanAppendingHeaders } from './core.js';

const debug = initDebug('music-metadata:parser:factory');

Expand Down Expand Up @@ -93,6 +94,13 @@ export class ParserFactory {

async parse(tokenizer: ITokenizer, parserLoader: IParserLoader | undefined, opts?: IOptions): Promise<IAudioMetadata> {

if (tokenizer.supportsRandomAccess()) {
debug('tokenizer supports random-access, scanning for appending headers');
await scanAppendingHeaders(tokenizer as IRandomAccessTokenizer, opts);
} else {
debug('tokenizer does not support random-access, cannot scan for appending headers');
}

if (!parserLoader) {
const buf = new Uint8Array(4100);
if (tokenizer.fileInfo.mimeType) {
Expand Down
11 changes: 7 additions & 4 deletions lib/apev2/APEv2Parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import { StringType } from 'token-types';
import { uint8ArrayToString } from 'uint8array-extras';

import * as util from '../common/Util.js';
import type { IOptions, IRandomReader, IApeHeader } from '../type.js';
import type { IOptions, IApeHeader } from '../type.js';
import type { INativeMetadataCollector } from '../common/MetadataCollector.js';
import { BasicParser } from '../common/BasicParser.js';
import {
Expand All @@ -18,6 +18,7 @@ import {
TagItemHeader
} from './APEv2Token.js';
import { makeUnexpectedFileContentError } from '../ParseError.js';
import type { IRandomAccessTokenizer } from 'strtok3';

const debug = initDebug('music-metadata:parser:APEv2');

Expand Down Expand Up @@ -54,13 +55,15 @@ export class APEv2Parser extends BasicParser {

/**
* Calculates the APEv1 / APEv2 first field offset
* @param reader
* @param tokenizer
* @param offset
*/
public static async findApeFooterOffset(reader: IRandomReader, offset: number): Promise<IApeHeader | undefined> {
public static async findApeFooterOffset(tokenizer: IRandomAccessTokenizer, offset: number): Promise<IApeHeader | undefined> {
// Search for APE footer header at the end of the file
const apeBuf = new Uint8Array(TagFooter.len);
await reader.randomRead(apeBuf, 0, TagFooter.len, offset - TagFooter.len);
const position = tokenizer.position;
await tokenizer.readBuffer(apeBuf, {position: offset - TagFooter.len});
tokenizer.setPosition(position);
const tagFooter = TagFooter.get(apeBuf, 0);
if (tagFooter.ID === 'APETAGEX') {
if (tagFooter.flags.isHeader) {
Expand Down
34 changes: 0 additions & 34 deletions lib/common/RandomFileReader.ts

This file was deleted.

26 changes: 0 additions & 26 deletions lib/common/RandomUint8ArrayReader.ts

This file was deleted.

18 changes: 7 additions & 11 deletions lib/core.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,14 @@
* Primary entry point, Node.js specific entry point is MusepackParser.ts
*/

import {type AnyWebByteStream, type IFileInfo, type ITokenizer, fromWebStream, fromBuffer} from 'strtok3';
import { type AnyWebByteStream, type IFileInfo, type ITokenizer, fromWebStream, fromBuffer, type IRandomAccessTokenizer } from 'strtok3';

import { ParserFactory } from './ParserFactory.js';
import { RandomUint8ArrayReader } from './common/RandomUint8ArrayReader.js';
import { APEv2Parser } from './apev2/APEv2Parser.js';
import { hasID3v1Header } from './id3v1/ID3v1Parser.js';
import { getLyricsHeaderLength } from './lyrics3/Lyrics3.js';

import type { IAudioMetadata, INativeTagDict, IOptions, IPicture, IPrivateOptions, IRandomReader, ITag } from './type.js';
import type { IAudioMetadata, INativeTagDict, IOptions, IPicture, IPrivateOptions, ITag } from './type.js';

export type { IFileInfo } from 'strtok3';

Expand Down Expand Up @@ -54,9 +53,6 @@ export function parseWebStream(webStream: AnyWebByteStream, fileInfo?: IFileInfo
*/
export async function parseBuffer(uint8Array: Uint8Array, fileInfo?: IFileInfo | string, options: IOptions = {}): Promise<IAudioMetadata> {

const bufferReader = new RandomUint8ArrayReader(uint8Array);
await scanAppendingHeaders(bufferReader, options);

const tokenizer = fromBuffer(uint8Array, {fileInfo: typeof fileInfo === 'string' ? {mimeType: fileInfo} : fileInfo});
return parseFromTokenizer(tokenizer, options);
}
Expand Down Expand Up @@ -112,16 +108,16 @@ export function selectCover(pictures?: IPicture[]): IPicture | null {
}) : null;
}

export async function scanAppendingHeaders(randomReader: IRandomReader, options: IPrivateOptions = {}) {
export async function scanAppendingHeaders(tokenizer: IRandomAccessTokenizer, options: IPrivateOptions = {}) {

let apeOffset = randomReader.fileSize;
if (await hasID3v1Header(randomReader)) {
let apeOffset = tokenizer.fileInfo.size;
if (await hasID3v1Header(tokenizer)) {
apeOffset -= 128;
const lyricsLen = await getLyricsHeaderLength(randomReader);
const lyricsLen = await getLyricsHeaderLength(tokenizer);
apeOffset -= lyricsLen;
}

options.apeHeader = await APEv2Parser.findApeFooterOffset(randomReader, apeOffset);
options.apeHeader = await APEv2Parser.findApeFooterOffset(tokenizer, apeOffset);
}

export declare function loadMusicMetadata(): Promise<typeof import('music-metadata')>;
13 changes: 7 additions & 6 deletions lib/id3v1/ID3v1Parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@ import { StringType, UINT8 } from 'token-types';

import * as util from '../common/Util.js';

import type { IGetToken, ITokenizer } from 'strtok3';
import type { IGetToken, IRandomAccessTokenizer, ITokenizer } from 'strtok3';
import { BasicParser } from '../common/BasicParser.js';
import { APEv2Parser } from '../apev2/APEv2Parser.js';
import type { AnyTagValue, IApeHeader, IPrivateOptions, IRandomReader } from '../type.js';
import type { AnyTagValue, IApeHeader, IPrivateOptions } from '../type.js';
import type { INativeMetadataCollector } from '../common/MetadataCollector.js';

const debug = initDebug('music-metadata:parser:ID3v1');
Expand Down Expand Up @@ -160,13 +160,14 @@ export class ID3v1Parser extends BasicParser {
private async addTag(id: string, value: AnyTagValue): Promise<void> {
await this.metadata.addTag('ID3v1', id, value);
}

}

export async function hasID3v1Header(reader: IRandomReader): Promise<boolean> {
if (reader.fileSize >= 128) {
export async function hasID3v1Header(tokenizer: IRandomAccessTokenizer): Promise<boolean> {
if (tokenizer.fileInfo.size >= 128) {
const tag = new Uint8Array(3);
await reader.randomRead(tag, 0, tag.length, reader.fileSize - 128);
const position = tokenizer.position;
await tokenizer.readBuffer(tag, {position: tokenizer.fileInfo.size - 128});
tokenizer.setPosition(position); // Restore tokenizer position
return new TextDecoder('latin1').decode(tag) === 'TAG';
}
return false;
Expand Down
10 changes: 1 addition & 9 deletions lib/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,9 @@ import type { Readable } from 'node:stream';
import { fromFile, fromStream, type IFileInfo } from 'strtok3';
import initDebug from 'debug';

import { parseFromTokenizer, scanAppendingHeaders } from './core.js';
import { parseFromTokenizer, } from './core.js';
import { ParserFactory } from './ParserFactory.js';
import type { IAudioMetadata, IOptions } from './type.js';
import { RandomFileReader } from './common/RandomFileReader.js';

export * from './core.js';

Expand Down Expand Up @@ -39,13 +38,6 @@ export async function parseFile(filePath: string, options: IOptions = {}): Promi

const fileTokenizer = await fromFile(filePath);

const fileReader = await RandomFileReader.init(filePath, fileTokenizer.fileInfo.size as number);
try {
await scanAppendingHeaders(fileReader, options);
} finally {
await fileReader.close();
}

const parserFactory = new ParserFactory();

try {
Expand Down
11 changes: 7 additions & 4 deletions lib/lyrics3/Lyrics3.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
import type { IRandomReader } from '../type.js';
import type {IRandomAccessTokenizer} from 'strtok3';

export const endTag2 = 'LYRICS200';

export async function getLyricsHeaderLength(reader: IRandomReader): Promise<number> {
if (reader.fileSize >= 143) {
export async function getLyricsHeaderLength(tokenizer: IRandomAccessTokenizer): Promise<number> {
const fileSize = tokenizer.fileInfo.size;
if (fileSize >= 143) {
const buf = new Uint8Array(15);
await reader.randomRead(buf, 0, buf.length, reader.fileSize - 143);
const position = tokenizer.position;
await tokenizer.readBuffer(buf, {position: fileSize - 143});
tokenizer.setPosition(position); // Restore position
const txt = new TextDecoder('latin1').decode(buf);
const tag = txt.slice(6);
if (tag === endTag2) {
Expand Down
22 changes: 0 additions & 22 deletions lib/type.ts
Original file line number Diff line number Diff line change
Expand Up @@ -694,28 +694,6 @@ export interface IMetadataEvent {

export type Observer = (update: IMetadataEvent) => void;

/**
* Provides random data read access
* Used read operations on file of buffers
*/
export interface IRandomReader {

/**
* Total length of file or buffer
*/
fileSize: number;

/**
* Read from a given position of an abstracted file or buffer.
* @param {Uint8Array} buffer the buffer that the data will be written to.
* @param {number} offset the offset in the buffer to start writing at.
* @param {number} length an integer specifying the number of bytes to read.
* @param {number} position an argument specifying where to begin reading from in the file.
* @return {Promise<number>} bytes read
*/
randomRead(buffer: Uint8Array, offset: number, length: number, position: number): Promise<number>;
}

export interface ILyricsText {
text: string;
timestamp?: number;
Expand Down
3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,9 @@
"content-type": "^1.0.5",
"debug": "^4.3.7",
"file-type": "^19.6.0",
"link": "^2.1.1",
"media-typer": "^1.1.0",
"strtok3": "^9.0.1",
"strtok3": "^9.1.1",
"token-types": "^6.0.0",
"uint8array-extras": "^1.4.0"
},
Expand Down
35 changes: 24 additions & 11 deletions test/metadata-parsers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import fs from 'node:fs';
import * as mm from '../lib/index.js';
import type { IAudioMetadata, IOptions } from '../lib/index.js';

type ParseFileMethod = (skipTest: () => void, filePath: string, mimeType?: string, options?: IOptions) => Promise<IAudioMetadata>;
type ParseFileMethod = (skipTest: () => void, filePath: string, mimeType?: string, options?: IOptions) => Promise<{metadata: IAudioMetadata, randomRead: boolean}>;

interface IParser {
description: string;
Expand All @@ -18,33 +18,46 @@ const [nodeMajorVersion] = process.versions.node.split('.').map(Number);
export const Parsers: IParser[] = [
{
description: 'parseFile',
initParser: (skipTest, filePath: string, mimeType?: string, options?: IOptions) => {
return mm.parseFile(filePath, options);
initParser: async (skipTest, filePath: string, mimeType?: string, options?: IOptions) => {
return {
metadata: await mm.parseFile(filePath, options),
randomRead: true
};
}
}, {
description: 'parseStream (Node.js)',
initParser: (skipTest, filePath: string, mimeType?: string, options?: IOptions) => {
initParser: async (skipTest, filePath: string, mimeType?: string, options?: IOptions) => {
const stream = fs.createReadStream(filePath);
return mm.parseStream(stream, {mimeType}, options).then(metadata => {
try {
return {
metadata: await mm.parseStream(stream, {mimeType: mimeType}, options),
randomRead: false
};
} finally {
stream.close();
return metadata;
});
}
}
}, {
description: 'parseBlob',
initParser: (skipTest, filePath: string, mimeType?: string, options?: IOptions) => {
initParser: async (skipTest, filePath: string, mimeType?: string, options?: IOptions) => {
if (nodeMajorVersion < 20) {
skipTest();
}
const buffer = fs.readFileSync(filePath);
return mm.parseBlob(new Blob([buffer], {type: mimeType}), options);
return {
metadata: await mm.parseBlob(new Blob([buffer], {type: mimeType}), options),
randomRead: false
};
}
}, {
description: 'parseBuffer',
initParser: (skipTest, filePath: string, mimeType?: string, options?: IOptions) => {
initParser: async(skipTest, filePath: string, mimeType?: string, options?: IOptions) => {
const buffer = fs.readFileSync(filePath);
const array = new Uint8Array(buffer);
return mm.parseBuffer(array, {mimeType}, options);
return {
metadata: await mm.parseBuffer(array, {mimeType}, options),
randomRead: true
};
}
}
];
4 changes: 2 additions & 2 deletions test/test-file-aac.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ describe('Parse ADTS/AAC', () => {
if (parser.description === 'parseBlob') {
this.skip(); // ToDo: fix different behaviour parseFromWebStream()
}
const metadata = await parser.initParser(() => this.skip(), path.join(aacSamplePath, 'adts-mpeg4.aac'), 'audio/aac', {
const {metadata} = await parser.initParser(() => this.skip(), path.join(aacSamplePath, 'adts-mpeg4.aac'), 'audio/aac', {
duration: true
});
checkFormat(metadata.format, 'ADTS/MPEG-4', 'AAC', 'AAC LC', 16000, 1, 20399, 256000);
Expand All @@ -41,7 +41,7 @@ describe('Parse ADTS/AAC', () => {

Parsers.forEach(parser => {
it(parser.description, async function(){
const metadata = await parser.initParser(() => this.skip(), path.join(aacSamplePath, 'adts-mpeg4-2.aac'), 'audio/aac', {
const {metadata} = await parser.initParser(() => this.skip(), path.join(aacSamplePath, 'adts-mpeg4-2.aac'), 'audio/aac', {
duration: true
});
checkFormat(metadata.format, 'ADTS/MPEG-4', 'AAC', 'AAC LC', 44100, 2, 128000, 14336);
Expand Down
Loading

0 comments on commit 137e639

Please sign in to comment.