diff --git a/lib/handlers/get.js b/lib/handlers/get.js index f4aed8eeb..effe681d6 100644 --- a/lib/handlers/get.js +++ b/lib/handlers/get.js @@ -128,6 +128,9 @@ async function handler (req, res, next) { res.send(data) return next() } catch (err) { + if (err.message === '404') { + return next(error(404, 'HTML do not contain data island')) + } debug('error translating: ' + req.originalUrl + ' ' + contentType + ' -> ' + possibleRDFType + ' -- ' + 500 + ' ' + err.message) return next(error(500, 'Error translating between RDF formats')) } diff --git a/lib/handlers/patch.js b/lib/handlers/patch.js index bda388393..d2385e112 100644 --- a/lib/handlers/patch.js +++ b/lib/handlers/patch.js @@ -8,7 +8,7 @@ const debug = require('../debug').handlers const error = require('../http-error') const $rdf = require('rdflib') const crypto = require('crypto') -const { overQuota, getContentType } = require('../utils') +const { overQuota, getContentType, HTMLDataIsland } = require('../utils') const withLock = require('../lock') // Patch parsers by request body content type @@ -18,6 +18,8 @@ const PATCH_PARSERS = { 'text/n3': require('./patch/n3-patch-parser.js') } +const dataIsland = {} + // use media-type as contentType for new RDF resource const DEFAULT_FOR_NEW_CONTENT_TYPE = 'text/turtle' @@ -116,16 +118,33 @@ function readGraph (resource) { } } debug('PATCH -- Read target file (%d bytes)', fileContents.length) + + // HTML data Island + let dataScript = '' + if (resource.contentType.includes('html')) { + [dataScript, dataIsland.contentType] = HTMLDataIsland(fileContents) + // default HTML data island + if (!dataIsland.contentType) { + dataIsland.contentType = DEFAULT_FOR_NEW_CONTENT_TYPE + } + dataIsland.fileContents = fileContents.replace(dataScript, '') + // remove data island from HTML + fileContents = dataScript.replace(/^/gms, '').replace(/<\/script>$/gms, '') + } + fileContents = resource.contentType.includes('json') ? JSON.parse(fileContents) : fileContents resolve(fileContents) }) ) - // Parse the resource's file contents + // Parse the resource's file contents .then((fileContents) => { + console.log('file1 ' + fileContents) const graph = $rdf.graph() debug('PATCH -- Reading %s with content type %s', resource.url, resource.contentType) try { - $rdf.parse(fileContents, graph, resource.url, resource.contentType) + const contentType = dataIsland.contentType || resource.contentType // alain + console.log(contentType) + $rdf.parse(fileContents, graph, resource.url, contentType) } catch (err) { throw error(500, `Patch: Target ${resource.contentType} file syntax error: ${err}`) } @@ -213,13 +232,34 @@ function writeGraph (graph, resource, root, serverUri) { }).catch(() => reject(error(500, 'Error finding user quota'))) } - if (resource.contentType === 'application/ld+json') { - $rdf.serialize(resourceSym, graph, resource.url, resource.contentType, function (err, result) { + function addDataIsland (result) { + const scriptResult = `` + const regex = `/(.*?)/gms` + const script = dataIsland.fileContents.match(regex) + let data = '' + console.log('alain script' + script) + if (script) data = dataIsland.fileContents.replace(script, scriptResult) + else { + if (dataIsland.fileContents.includes('')) { + data = dataIsland.fileContents.split('').join(scriptResult + '\n') + } else { + data = dataIsland.fileContents.split('').join('\n' + scriptResult + '\n\n') + } + } + console.log('alain ' + data) + return data + } + + const contentType = dataIsland.contentType || resource.contentType + if (contentType === 'application/ld+json') { + $rdf.serialize(resourceSym, graph, resource.url, contentType, function (err, result) { if (err) return reject(error(500, `Failed to serialize after patch: ${err}`)) + if (dataIsland && dataIsland.contentType) result = addDataIsland(result) doWrite(result) }) } else { - const serialized = $rdf.serialize(resourceSym, graph, resource.url, resource.contentType) + let serialized = $rdf.serialize(resourceSym, graph, resource.url, contentType) + if (dataIsland && dataIsland.contentType) serialized = addDataIsland(serialized) doWrite(serialized) } }) diff --git a/lib/utils.js b/lib/utils.js index 0e16193e1..1038b455d 100644 --- a/lib/utils.js +++ b/lib/utils.js @@ -13,6 +13,7 @@ module.exports.getQuota = getQuota module.exports.overQuota = overQuota module.exports.getContentType = getContentType module.exports.parse = parse +module.exports.HTMLDataIsland = HTMLDataIsland const fs = require('fs') const path = require('path') @@ -128,6 +129,25 @@ function serialize (graph, baseUri, contentType) { }) } +function HTMLDataIsland (data) { + let from = '' + let dataScript = '' + const scripts = data.split('' + const RDFType = ['text/turtle', 'text/n3', 'application/ld+json', 'application/rdf+xml'] + const contentType = RDFType.find(type => script.includes(`type="${type}"`)) + if (contentType) { + dataScript = script // .replace(/^/gms, '').replace(/<\/script>$/gms, '') + from = contentType + break + } + } + } + return [dataScript, from] +} + function translate (stream, baseUri, from, to) { return new Promise((resolve, reject) => { let data = '' @@ -136,6 +156,17 @@ function translate (stream, baseUri, from, to) { data += chunk }) .on('end', function () { + // check for HTML data island + let dataScript = '' + if (from === 'text/html') { + [dataScript, from] = HTMLDataIsland(data) + data = dataScript.replace(/^/gms, '').replace(/<\/script>$/gms, '') + if (!from) { + reject(new Error(404, 'data island do not exist')) + } + } + if (from === 'text/html') return resolve(data) + // parse 'from', serialize 'to' const graph = $rdf.graph() $rdf.parse(data, graph, baseUri, from, function (err) { if (err) return reject(err) @@ -174,7 +205,7 @@ function stringToStream (string) { function stripLineEndings (obj) { if (!obj) { return obj } - return obj.replace(/(\r\n|\n|\r)/gm, '') + return obj.replace(/(\r\n|\n|\r)/gms, '') } /** diff --git a/test/integration/formats-test.js b/test/integration/formats-test.js index 83dd61525..4b7f98656 100644 --- a/test/integration/formats-test.js +++ b/test/integration/formats-test.js @@ -16,6 +16,26 @@ describe('formats', function () { .expect(/Hello, world!/) .expect(200, done) }) + describe('HTML dataIsland', function () { + it('should return 404 if Accept is set to only text/turtle and no dataIsland', function (done) { + server.get('/hello.html') + .set('accept', 'text/turtle') + .expect(404, done) + }) + it('should return text/turtle if dataIsland and Accept is set to only text/turtle', function (done) { + server.get('/hello-with-data-island.html') + .set('accept', 'text/turtle') + .expect('Content-type', /text\/turtle/) + .expect(/<> a "test"./) + .expect(200, done) + }) + it('should return JSON-LD if dataIsland and Accept is set to only application/ld+json', function (done) { + server.get('/hello-with-data-island.html') + .set('accept', 'application/ld+json') + .expect('Content-type', 'application/ld+json; charset=utf-8') + .expect(200, done) + }) + }) }) describe('JSON-LD', function () { diff --git a/test/integration/patch-test.js b/test/integration/patch-test.js index e9fba6523..5beb7aa82 100644 --- a/test/integration/patch-test.js +++ b/test/integration/patch-test.js @@ -460,6 +460,63 @@ describe('PATCH', () => { }) }) + describe('HTML data island', () => { + describe('on a non-existent HTML dataIsland', describePatch({ + path: '/data-island-new.html', + // exists: false, + patch: `<> a solid:InsertDeletePatch; + solid:inserts { . }.` + }, { // expected: + status: 200, + text: 'Patch applied successfully', + result: `\r + + + +Hello, world!\r +\r +` + })) + + describe('with a matching WHERE clause', describePatch({ + path: '/data-island.html', + patch: `<> a solid:InsertDeletePatch; + solid:where { ?a . }; + solid:inserts { ?a . }; + solid:deletes { ?a . }.` + }, { // expected: + status: 200, + text: 'Patch applied successfully', + result: ` + + + + +Hello, world! + +` + })) + + describe('with a non-matching WHERE clause', describePatch({ + path: '/data-island.html', + patch: `<> a solid:InsertDeletePatch; + solid:where { ?a . }; + solid:inserts { ?a . }; + solid:deletes { ?a . }.` + }, { // expected: + status: 409, + text: 'The patch could not be applied' + })) + }) + // Creates a PATCH test for the given resource with the given expected outcomes function describePatch ({ path, exists = true, patch, contentType = 'text/n3' }, { status = 200, text, result }) { diff --git a/test/resources/hello-with-data-island.html b/test/resources/hello-with-data-island.html new file mode 100644 index 000000000..8d20401a5 --- /dev/null +++ b/test/resources/hello-with-data-island.html @@ -0,0 +1,5 @@ + + +Hello, world! + + diff --git a/test/resources/patch/data-island-new.html b/test/resources/patch/data-island-new.html new file mode 100644 index 000000000..d7e7321c4 --- /dev/null +++ b/test/resources/patch/data-island-new.html @@ -0,0 +1,3 @@ + +Hello, world! + diff --git a/test/resources/patch/data-island.html b/test/resources/patch/data-island.html new file mode 100644 index 000000000..9297a86ac --- /dev/null +++ b/test/resources/patch/data-island.html @@ -0,0 +1,9 @@ + + + + +Hello, world! +