Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GET/PATCH for HTML data island in NSS #1715

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions lib/handlers/get.js
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,9 @@ async function handler (req, res, next) {
res.send(data)
return next()
} catch (err) {
if (err.message === '404') {
return next(error(404, 'HTML do not contain data island'))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
return next(error(404, 'HTML do not contain data island'))
return next(error(404, 'HTML does not contain any data islands'))

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The plural in islands seems to allow multiples. Actually my PR only allows one.
Not sure that more than one could be expected. The id="data" cannot be used more than once in an html document.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There may be multiple islands in a single HTML doc, tho perhaps not (yet) in your environment. My error text is equivalent to saying, "I found no data island in this HTML," among other phrasings.

Another phrasing suggestion might be, HTML does not contain a data island.

}
debug('error translating: ' + req.originalUrl + ' ' + contentType + ' -> ' + possibleRDFType + ' -- ' + 500 + ' ' + err.message)
return next(error(500, 'Error translating between RDF formats'))
}
Expand Down
52 changes: 46 additions & 6 deletions lib/handlers/patch.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ const debug = require('../debug').handlers
const error = require('../http-error')
const $rdf = require('rdflib')
const crypto = require('crypto')
const { overQuota, getContentType } = require('../utils')
const { overQuota, getContentType, HTMLDataIsland } = require('../utils')
const withLock = require('../lock')

// Patch parsers by request body content type
Expand All @@ -18,6 +18,8 @@ const PATCH_PARSERS = {
'text/n3': require('./patch/n3-patch-parser.js')
}

const dataIsland = {}

// use media-type as contentType for new RDF resource
const DEFAULT_FOR_NEW_CONTENT_TYPE = 'text/turtle'

Expand Down Expand Up @@ -116,16 +118,33 @@ function readGraph (resource) {
}
}
debug('PATCH -- Read target file (%d bytes)', fileContents.length)

// HTML data Island
let dataScript = ''
if (resource.contentType.includes('html')) {
[dataScript, dataIsland.contentType] = HTMLDataIsland(fileContents)
// default HTML data island
if (!dataIsland.contentType) {
dataIsland.contentType = DEFAULT_FOR_NEW_CONTENT_TYPE
}
dataIsland.fileContents = fileContents.replace(dataScript, '')
// remove data island from HTML
fileContents = dataScript.replace(/^<script(.*?)>/gms, '').replace(/<\/script>$/gms, '')
}

fileContents = resource.contentType.includes('json') ? JSON.parse(fileContents) : fileContents
resolve(fileContents)
})
)
// Parse the resource's file contents
// Parse the resource's file contents
.then((fileContents) => {
console.log('file1 ' + fileContents)
const graph = $rdf.graph()
debug('PATCH -- Reading %s with content type %s', resource.url, resource.contentType)
try {
$rdf.parse(fileContents, graph, resource.url, resource.contentType)
const contentType = dataIsland.contentType || resource.contentType // alain
console.log(contentType)
$rdf.parse(fileContents, graph, resource.url, contentType)
} catch (err) {
throw error(500, `Patch: Target ${resource.contentType} file syntax error: ${err}`)
}
Expand Down Expand Up @@ -213,13 +232,34 @@ function writeGraph (graph, resource, root, serverUri) {
}).catch(() => reject(error(500, 'Error finding user quota')))
}

if (resource.contentType === 'application/ld+json') {
$rdf.serialize(resourceSym, graph, resource.url, resource.contentType, function (err, result) {
function addDataIsland (result) {
const scriptResult = `<script type="${dataIsland.contentType}" id="data">\n${result}</script>`
const regex = `/<script(.*?)type="${dataIsland.contentType.split(';')[0]}"(.*?)>(.*?)</script>/gms`
const script = dataIsland.fileContents.match(regex)
let data = ''
console.log('alain script' + script)
if (script) data = dataIsland.fileContents.replace(script, scriptResult)
else {
if (dataIsland.fileContents.includes('</head>')) {
data = dataIsland.fileContents.split('</head>').join(scriptResult + '\n</head>')
} else {
data = dataIsland.fileContents.split('<body>').join('<head>\n' + scriptResult + '\n</head>\n<body>')
}
}
console.log('alain ' + data)
return data
}

const contentType = dataIsland.contentType || resource.contentType
if (contentType === 'application/ld+json') {
$rdf.serialize(resourceSym, graph, resource.url, contentType, function (err, result) {
if (err) return reject(error(500, `Failed to serialize after patch: ${err}`))
if (dataIsland && dataIsland.contentType) result = addDataIsland(result)
doWrite(result)
})
} else {
const serialized = $rdf.serialize(resourceSym, graph, resource.url, resource.contentType)
let serialized = $rdf.serialize(resourceSym, graph, resource.url, contentType)
if (dataIsland && dataIsland.contentType) serialized = addDataIsland(serialized)
doWrite(serialized)
}
})
Expand Down
33 changes: 32 additions & 1 deletion lib/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ module.exports.getQuota = getQuota
module.exports.overQuota = overQuota
module.exports.getContentType = getContentType
module.exports.parse = parse
module.exports.HTMLDataIsland = HTMLDataIsland

const fs = require('fs')
const path = require('path')
Expand Down Expand Up @@ -128,6 +129,25 @@ function serialize (graph, baseUri, contentType) {
})
}

function HTMLDataIsland (data) {
let from = ''
let dataScript = ''
const scripts = data.split('</script')
if (scripts && scripts.length) {
for (let script of scripts) {
script = '<script' + script.split('<script')[1] + '</script>'
const RDFType = ['text/turtle', 'text/n3', 'application/ld+json', 'application/rdf+xml']
const contentType = RDFType.find(type => script.includes(`type="${type}"`))
if (contentType) {
dataScript = script // .replace(/^<script(.*?)>/gms, '').replace(/<\/script>$/gms, '')
from = contentType
break
}
}
}
return [dataScript, from]
}

function translate (stream, baseUri, from, to) {
return new Promise((resolve, reject) => {
let data = ''
Expand All @@ -136,6 +156,17 @@ function translate (stream, baseUri, from, to) {
data += chunk
})
.on('end', function () {
// check for HTML data island
let dataScript = ''
if (from === 'text/html') {
[dataScript, from] = HTMLDataIsland(data)
data = dataScript.replace(/^<script(.*?)>/gms, '').replace(/<\/script>$/gms, '')
if (!from) {
reject(new Error(404, 'data island do not exist'))
}
}
if (from === 'text/html') return resolve(data)
// parse 'from', serialize 'to'
const graph = $rdf.graph()
$rdf.parse(data, graph, baseUri, from, function (err) {
if (err) return reject(err)
Expand Down Expand Up @@ -174,7 +205,7 @@ function stringToStream (string) {
function stripLineEndings (obj) {
if (!obj) { return obj }

return obj.replace(/(\r\n|\n|\r)/gm, '')
return obj.replace(/(\r\n|\n|\r)/gms, '')
}

/**
Expand Down
20 changes: 20 additions & 0 deletions test/integration/formats-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,26 @@ describe('formats', function () {
.expect(/Hello, world!/)
.expect(200, done)
})
describe('HTML dataIsland', function () {
it('should return 404 if Accept is set to only text/turtle and no dataIsland', function (done) {
server.get('/hello.html')
.set('accept', 'text/turtle')
.expect(404, done)
})
it('should return text/turtle if dataIsland and Accept is set to only text/turtle', function (done) {
server.get('/hello-with-data-island.html')
.set('accept', 'text/turtle')
.expect('Content-type', /text\/turtle/)
.expect(/<> a "test"./)
.expect(200, done)
})
it('should return JSON-LD if dataIsland and Accept is set to only application/ld+json', function (done) {
server.get('/hello-with-data-island.html')
.set('accept', 'application/ld+json')
.expect('Content-type', 'application/ld+json; charset=utf-8')
.expect(200, done)
})
})
})

describe('JSON-LD', function () {
Expand Down
57 changes: 57 additions & 0 deletions test/integration/patch-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,63 @@ describe('PATCH', () => {
})
})

describe('HTML data island', () => {
describe('on a non-existent HTML dataIsland', describePatch({
path: '/data-island-new.html',
// exists: false,
patch: `<> a solid:InsertDeletePatch;
solid:inserts { <x> <y> <z>. }.`
}, { // expected:
status: 200,
text: 'Patch applied successfully',
result: `<html>\r
<head>
<script type="text/turtle" id="data">
@prefix : </data-island-new.html#>.
@prefix tim: </>.

tim:x tim:y tim:z.

</script>
</head>
<body>Hello, world!</body>\r
</html>\r
`
}))

describe('with a matching WHERE clause', describePatch({
path: '/data-island.html',
patch: `<> a solid:InsertDeletePatch;
solid:where { ?a <b> <c>. };
solid:inserts { ?a <y> <z>. };
solid:deletes { ?a <b> <c>. }.`
}, { // expected:
status: 200,
text: 'Patch applied successfully',
result: `<html>
<head>

<script type="text/turtle" id="data">
@prefix : </data-island.html#>.\n@prefix tim: </>.\n\ntim:a tim:y tim:z.\n\ntim:d tim:e tim:f.\n
</script>
</head>
<body>Hello, world!</body>
</html>
`
}))

describe('with a non-matching WHERE clause', describePatch({
path: '/data-island.html',
patch: `<> a solid:InsertDeletePatch;
solid:where { ?a <y> <z>. };
solid:inserts { ?a <y> <z>. };
solid:deletes { ?a <b> <c>. }.`
}, { // expected:
status: 409,
text: 'The patch could not be applied'
}))
})

// Creates a PATCH test for the given resource with the given expected outcomes
function describePatch ({ path, exists = true, patch, contentType = 'text/n3' },
{ status = 200, text, result }) {
Expand Down
5 changes: 5 additions & 0 deletions test/resources/hello-with-data-island.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<html>
<script type="text/turtle" id="data"><> a "test".</script>
<body>Hello, world!</body>
<script type="javascript"></script>
</html>
3 changes: 3 additions & 0 deletions test/resources/patch/data-island-new.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
<html>
<body>Hello, world!</body>
</html>
9 changes: 9 additions & 0 deletions test/resources/patch/data-island.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
<html>
<head>
<script type="text/turtle" id="data">
<a> <b> <c>.
<d> <e> <f>.
</script>
</head>
<body>Hello, world!</body>
</html>