Skip to content

Commit

Permalink
Merge pull request #87 from kiwix/fds_archive
Browse files Browse the repository at this point in the history
Allow `zim::Archive` to be created with a set of File descriptor.
  • Loading branch information
kelson42 authored Apr 22, 2024
2 parents 9c52584 + d2ace70 commit 6109a7a
Show file tree
Hide file tree
Showing 6 changed files with 194 additions and 16 deletions.
3 changes: 2 additions & 1 deletion lib/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,8 @@ String getLibzimFiles() {
"${projectDir}/src/main/java/org/kiwix/libzim/SuggestionSearcher.java " +
"${projectDir}/src/main/java/org/kiwix/libzim/SuggestionSearch.java " +
"${projectDir}/src/main/java/org/kiwix/libzim/ZimFileFormatException.java " +
"${projectDir}/src/main/java/org/kiwix/libzim/EntryNotFoundException.java"
"${projectDir}/src/main/java/org/kiwix/libzim/EntryNotFoundException.java " +
"${projectDir}/src/main/java/org/kiwix/libzim/FdInput.java"
}

task buildLinuxBinding(type: Exec) {
Expand Down
71 changes: 58 additions & 13 deletions lib/src/main/cpp/libzim/archive.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,23 @@ int jni2fd(const jobject& fdObj, JNIEnv* env)
return env->GetIntField(fdObj, field_fd);
}

zim::FdInput jni2fdInput(const jobject& fdInputObj, JNIEnv* env)
{
jclass class_fdesc = env->FindClass("org/kiwix/libzim/FdInput");

jfieldID field_id = env->GetFieldID(class_fdesc, "fd", "Ljava/io/FileDescriptor;");
jobject fdObj = env->GetObjectField(fdInputObj, field_id);
int fd = jni2fd(fdObj, env);

field_id = env->GetFieldID(class_fdesc, "offset", "J");
long offset = env->GetLongField(fdInputObj, field_id);

field_id = env->GetFieldID(class_fdesc, "size", "J");
long size = env->GetLongField(fdInputObj, field_id);

return zim::FdInput(fd, offset, size);
}

} // unnamed namespace

JNIEXPORT void JNICALL Java_org_kiwix_libzim_Archive_setNativeArchiveByFD(
Expand All @@ -71,13 +88,8 @@ JNIEXPORT void JNICALL Java_org_kiwix_libzim_Archive_setNativeArchiveByFD(
int fd = jni2fd(fdObj, env);

LOG("Attempting to create reader with fd: %d", fd);
try {
auto archive = std::make_shared<zim::Archive>(fd);
SET_PTR(archive);
} catch (std::exception& e) {
LOG("Error opening ZIM file");
LOG("%s", e.what());
}
auto archive = std::make_shared<zim::Archive>(fd);
SET_PTR(archive);
#else
jclass exception = env->FindClass("java/lang/UnsupportedOperationException");
env->ThrowNew(exception, "org.kiwix.libzim.Archive.setNativeArchiveByFD() is not supported under Windows");
Expand All @@ -91,13 +103,46 @@ JNIEXPORT void JNICALL Java_org_kiwix_libzim_Archive_setNativeArchiveEmbedded(
int fd = jni2fd(fdObj, env);

LOG("Attempting to create reader with fd: %d", fd);
try {
auto archive = std::make_shared<zim::Archive>(fd, offset, size);
SET_PTR(archive);
} catch (std::exception& e) {
LOG("Error opening ZIM file");
LOG("%s", e.what());
auto archive = std::make_shared<zim::Archive>(fd, offset, size);
SET_PTR(archive);
#else
jclass exception = env->FindClass("java/lang/UnsupportedOperationException");
env->ThrowNew(exception, "org.kiwix.libzim.Archive.setNativeArchiveEmbedded() is not supported under Windows");
#endif
} CATCH_EXCEPTION()

JNIEXPORT void JNICALL Java_org_kiwix_libzim_Archive_setNativeArchiveEmbeddedFd(
JNIEnv* env, jobject thisObj, jobject fdObj) try
{
#ifndef _WIN32
auto fdInput = jni2fdInput(fdObj, env);

auto archive = std::make_shared<zim::Archive>(fdInput);
SET_PTR(archive);
#else
jclass exception = env->FindClass("java/lang/UnsupportedOperationException");
env->ThrowNew(exception, "org.kiwix.libzim.Archive.setNativeArchiveEmbedded() is not supported under Windows");
#endif
} CATCH_EXCEPTION()


JNIEXPORT void JNICALL Java_org_kiwix_libzim_Archive_setNativeArchiveEmbeddedFds(
JNIEnv* env, jobject thisObj, jobjectArray fdsObj) try
{
#ifndef _WIN32

jsize length = env->GetArrayLength(fdsObj);
std::vector<zim::FdInput> v;

int i;
for(i = 0; i<length; i++) {
jobject fdObj = env->GetObjectArrayElement(fdsObj, i);
auto fdInput = jni2fdInput(fdObj, env);
v.push_back(fdInput);
}

auto archive = std::make_shared<zim::Archive>(v);
SET_PTR(archive);
#else
jclass exception = env->FindClass("java/lang/UnsupportedOperationException");
env->ThrowNew(exception, "org.kiwix.libzim.Archive.setNativeArchiveEmbedded() is not supported under Windows");
Expand Down
15 changes: 15 additions & 0 deletions lib/src/main/java/org/kiwix/libzim/Archive.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.kiwix.libzim.Entry;
import org.kiwix.libzim.Item;
import org.kiwix.libzim.EntryIterator;
import org.kiwix.libzim.FdInput;
import java.io.FileDescriptor;

public class Archive
Expand All @@ -44,6 +45,18 @@ public Archive(FileDescriptor fd, long offset, long size)
setNativeArchiveEmbedded(fd, offset, size);
}

public Archive(FdInput fd)
throws ZimFileFormatException
{
setNativeArchiveEmbeddedFd(fd);
}

public Archive(FdInput[] fds)
throws ZimFileFormatException
{
setNativeArchiveEmbeddedFds(fds);
}

public native String getFilename();
public native long getFilesize();
public native int getAllEntryCount();
Expand Down Expand Up @@ -94,6 +107,8 @@ public Archive(FileDescriptor fd, long offset, long size)
private native void setNativeArchive(String filename);
private native void setNativeArchiveByFD(FileDescriptor fd);
private native void setNativeArchiveEmbedded(FileDescriptor fd, long offset, long size);
private native void setNativeArchiveEmbeddedFd(FdInput fd);
private native void setNativeArchiveEmbeddedFds(FdInput[] fds);

@Override
protected void finalize() { dispose(); }
Expand Down
35 changes: 35 additions & 0 deletions lib/src/main/java/org/kiwix/libzim/FdInput.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/*
* Copyright (C) 2017 Matthieu Gautier <[email protected]>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*/

package org.kiwix.libzim;

import java.io.FileDescriptor;

public class FdInput
{
public FileDescriptor fd;
public long offset;
public long size;

public FdInput(FileDescriptor fd_, long offset_, long size_) {
fd = fd_;
offset = offset_;
size = size_;
}
}
12 changes: 12 additions & 0 deletions lib/src/test/org/kiwix/test/libzim/TestArchive.java
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,18 @@ public TestArchive(FileDescriptor fd, long offset, long size)
inner = new Archive(fd, offset, size);
}

public TestArchive(FdInput fd)
throws ZimFileFormatException
{
inner = new Archive(fd);
}

public TestArchive(FdInput[] fds)
throws ZimFileFormatException
{
inner = new Archive(fds);
}

public String getFilename() { return inner.getFilename(); }
public long getFilesize() { return inner.getFilesize(); }
public int getAllEntryCount() { return inner.getAllEntryCount(); }
Expand Down
74 changes: 72 additions & 2 deletions lib/src/test/test.java
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,9 @@ private void testArchive(TestArchive archive)
assertTrue(Arrays.equals(faviconData, readData));

// Checking all metadata
assertFalse(archive.isMultiPart());
assertTrue(archive.hasNewNamespaceScheme());
assertTrue(archive.hasChecksum());
assertEquals("4a2709fddbee8c27db708c20b4952a06", archive.getChecksum());
assertTrue(archive.hasTitleIndex());
assertTrue(archive.hasFulltextIndex());
assertTrue(archive.hasMainEntry());
long[] illuSizes = {48};
Expand Down Expand Up @@ -221,6 +219,8 @@ public void testArchiveDirect()
{
TestArchive archive = new TestArchive("small.zim");
testArchive(archive);
assertFalse(archive.isMultiPart());
assertTrue(archive.hasTitleIndex());
assertTrue(archive.check());
assertEquals("small.zim", archive.getFilename());
}
Expand Down Expand Up @@ -261,6 +261,26 @@ public void testArchiveByFd()
FileInputStream fis = new FileInputStream("small.zim");
TestArchive archive = new TestArchive(fis.getFD());
testArchive(archive);
assertFalse(archive.isMultiPart());
assertTrue(archive.hasTitleIndex());
assertTrue(archive.check());
assertEquals("", archive.getFilename());
}
System.gc();
System.runFinalization();
}

@Test
public void testArchiveByFdInput()
throws JNIKiwixException, IOException, ZimFileFormatException, EntryNotFoundException {
{
File plainArchive = new File("small.zim");
FileInputStream fis = new FileInputStream("small.zim");
FdInput fd = new FdInput(fis.getFD(), 0, plainArchive.length());
TestArchive archive = new TestArchive(fd);
testArchive(archive);
assertFalse(archive.isMultiPart());
assertTrue(archive.hasTitleIndex());
assertTrue(archive.check());
assertEquals("", archive.getFilename());
}
Expand All @@ -278,6 +298,56 @@ public void testArchiveWithAnEmbeddedArchive()
// This fails. See https://github.com/openzim/libzim/issues/812
//assertTrue(archive.check());
testArchive(archive);
assertFalse(archive.isMultiPart());
assertTrue(archive.hasTitleIndex());
assertEquals("", archive.getFilename());
}
System.gc();
System.runFinalization();
}

@Test
public void testArchiveWithAnEmbeddedArchiveFdInputNaive()
throws JNIKiwixException, IOException, ZimFileFormatException, EntryNotFoundException {
{
File plainArchive = new File("small.zim");
FileInputStream fis = new FileInputStream("small.zim.embedded");
FdInput fd1 = new FdInput(fis.getFD(), 8, plainArchive.length() / 2);
FdInput fd2 = new FdInput(fis.getFD(), fd1.offset + fd1.size, plainArchive.length() - fd1.size);

FdInput fds[] = {fd1, fd2};

TestArchive archive = new TestArchive(fds);
// This fails. See https://github.com/openzim/libzim/issues/812
//assertTrue(archive.check());
testArchive(archive);
assertTrue(archive.isMultiPart());
//Naive split cut the title index in the middle. libzim cannot read it.
assertFalse(archive.hasTitleIndex());
assertEquals("", archive.getFilename());
}
System.gc();
System.runFinalization();
}

@Test
public void testArchiveWithAnEmbeddedArchiveFdInput()
throws JNIKiwixException, IOException, ZimFileFormatException, EntryNotFoundException {
{
File plainArchive = new File("small.zim");
FileInputStream fis = new FileInputStream("small.zim.embedded");
FdInput fd1 = new FdInput(fis.getFD(), 8, plainArchive.length() / 10);
FdInput fd2 = new FdInput(fis.getFD(), fd1.offset + fd1.size, plainArchive.length() - fd1.size);

FdInput fds[] = {fd1, fd2};

TestArchive archive = new TestArchive(fds);
// This fails. See https://github.com/openzim/libzim/issues/812
//assertTrue(archive.check());
testArchive(archive);
assertTrue(archive.isMultiPart());
//If we don't cut in the middle of xapian db, we can read it.
assertTrue(archive.hasTitleIndex());
assertEquals("", archive.getFilename());
}
System.gc();
Expand Down

0 comments on commit 6109a7a

Please sign in to comment.