-
Notifications
You must be signed in to change notification settings - Fork 2
/
DatasetCore.js
450 lines (362 loc) · 13.7 KB
/
DatasetCore.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
function isString (s) {
return typeof s === 'string' || s instanceof String
}
const xsdString = 'http://www.w3.org/2001/XMLSchema#string'
function termToId (term) {
if (typeof term === 'string') {
return term
}
if (!term) {
return ''
}
if (typeof term.id !== 'undefined' && term.termType !== 'Quad') {
return term.id
}
let subject, predicate, object, graph
// Term instantiated with another library
switch (term.termType) {
case 'NamedNode':
return term.value
case 'BlankNode':
return `_:${term.value}`
case 'Variable':
return `?${term.value}`
case 'DefaultGraph':
return ''
case 'Literal':
if (term.language) {
return `"${term.value}"@${term.language}`
}
return `"${term.value}"${term.datatype && term.datatype.value !== xsdString ? `^^${term.datatype.value}` : ''}`
case 'Quad':
// To identify RDF* quad components, we escape quotes by doubling them.
// This avoids the overhead of backslash parsing of Turtle-like syntaxes.
subject = escapeQuotes(termToId(term.subject))
predicate = escapeQuotes(termToId(term.predicate))
object = escapeQuotes(termToId(term.object))
graph = term.graph.termType === 'DefaultGraph' ? '' : ` ${termToId(term.graph)}`
return `<<${subject} ${predicate} ${object}${graph}>>`
default:
throw new Error(`Unexpected termType: ${term.termType}`)
}
}
const escapedLiteral = /^"(.*".*)(?="[^"]*$)/
function escapeQuotes (id) {
return id.replace(escapedLiteral, (_, quoted) => `"${quoted.replace(/"/g, '""')}`)
}
class DatasetCore {
constructor (quads) {
// The number of quads is initially zero
this._size = 0
// `_graphs` contains subject, predicate, and object indexes per graph
this._graphs = Object.create(null)
// `_ids` maps entities such as `http://xmlns.com/foaf/0.1/name` to numbers,
// saving memory by using only numbers as keys in `_graphs`
this._id = 0
this._ids = Object.create(null)
this._ids['><'] = 0 // dummy entry, so the first actual key is non-zero
this._entities = Object.create(null) // inverse of `_ids`
this._quads = new Map()
// Add quads if passed
if (quads) {
for (const quad of quads) {
this.add(quad)
}
}
}
get size () {
// Return the quad count if if was cached
let size = this._size
if (size !== null) {
return size
}
// Calculate the number of quads by counting to the deepest level
size = 0
const graphs = this._graphs
let subjects, subject
for (const graphKey in graphs) {
for (const subjectKey in (subjects = graphs[graphKey].subjects)) {
for (const predicateKey in (subject = subjects[subjectKey])) {
size += Object.keys(subject[predicateKey]).length
}
}
}
this._size = size
return this._size
}
add (quad) {
// Convert terms to internal string representation
let subject = termToId(quad.subject)
let predicate = termToId(quad.predicate)
let object = termToId(quad.object)
const graph = termToId(quad.graph)
// Find the graph that will contain the triple
let graphItem = this._graphs[graph]
// Create the graph if it doesn't exist yet
if (!graphItem) {
graphItem = this._graphs[graph] = { subjects: {}, predicates: {}, objects: {} }
// Freezing a graph helps subsequent `add` performance,
// and properties will never be modified anyway
Object.freeze(graphItem)
}
// Since entities can often be long IRIs, we avoid storing them in every index.
// Instead, we have a separate index that maps entities to numbers,
// which are then used as keys in the other indexes.
const ids = this._ids
const entities = this._entities
subject = ids[subject] || (ids[entities[++this._id] = subject] = this._id)
predicate = ids[predicate] || (ids[entities[++this._id] = predicate] = this._id)
object = ids[object] || (ids[entities[++this._id] = object] = this._id)
this._addToIndex(graphItem.subjects, subject, predicate, object)
this._addToIndex(graphItem.predicates, predicate, object, subject)
this._addToIndex(graphItem.objects, object, subject, predicate)
this._setQuad(subject, predicate, object, graph, quad)
// The cached quad count is now invalid
this._size = null
return this
}
delete (quad) {
// Convert terms to internal string representation
let subject = termToId(quad.subject)
let predicate = termToId(quad.predicate)
let object = termToId(quad.object)
const graph = termToId(quad.graph)
// Find internal identifiers for all components
// and verify the quad exists.
const ids = this._ids
const graphs = this._graphs
let graphItem, subjects, predicates
if (!(subject = ids[subject]) || !(predicate = ids[predicate]) ||
!(object = ids[object]) || !(graphItem = graphs[graph]) ||
!(subjects = graphItem.subjects[subject]) ||
!(predicates = subjects[predicate]) ||
!(object in predicates)
) {
return this
}
// Remove it from all indexes
this._removeFromIndex(graphItem.subjects, subject, predicate, object)
this._removeFromIndex(graphItem.predicates, predicate, object, subject)
this._removeFromIndex(graphItem.objects, object, subject, predicate)
if (this._size !== null) {
this._size--
}
this._deleteQuad(subject, predicate, object, graph)
// Remove the graph if it is empty
for (subject in graphItem.subjects) { // eslint-disable-line no-unreachable-loop
return this
}
delete graphs[graph]
return this
}
has (quad) {
// Convert terms to internal string representation
const subject = termToId(quad.subject)
const predicate = termToId(quad.predicate)
const object = termToId(quad.object)
const graph = termToId(quad.graph)
const graphItem = this._graphs[graph]
if (!graphItem) {
return false
}
const ids = this._ids
let subjectId, predicateId, objectId
// Translate IRIs to internal index keys.
if (
(isString(subject) && !(subjectId = ids[subject])) ||
(isString(predicate) && !(predicateId = ids[predicate])) ||
(isString(object) && !(objectId = ids[object]))
) {
return false
}
return this._countInIndex(graphItem.objects, objectId, subjectId, predicateId) === 1
}
match (subject, predicate, object, graph) {
return this._createDataset(this._match(subject, predicate, object, graph))
}
[Symbol.iterator] () {
return this._match()[Symbol.iterator]()
}
// ## Private methods
// ### `_addToIndex` adds a quad to a three-layered index.
// Returns if the index has changed, if the entry did not already exist.
_addToIndex (index0, key0, key1, key2) {
// Create layers as necessary
const index1 = index0[key0] || (index0[key0] = {})
const index2 = index1[key1] || (index1[key1] = {})
// Setting the key to _any_ value signals the presence of the quad
const existed = key2 in index2
if (!existed) {
index2[key2] = null
}
return !existed
}
// ### `_removeFromIndex` removes a quad from a three-layered index
_removeFromIndex (index0, key0, key1, key2) {
// Remove the quad from the index
const index1 = index0[key0]
const index2 = index1[key1]
delete index2[key2]
// Remove intermediary index layers if they are empty
for (const key in index2) { // eslint-disable-line no-unreachable-loop
return
}
delete index1[key1]
for (const key in index1) { // eslint-disable-line no-unreachable-loop
return
}
delete index0[key0]
}
// ### `_findInIndex` finds a set of quads in a three-layered index.
// The index base is `index0` and the keys at each level are `key0`, `key1`, and `key2`.
// Any of these keys can be undefined, which is interpreted as a wildcard.
// `name0`, `name1`, and `name2` are the names of the keys at each level,
// used when reconstructing the resulting quad
// (for instance: _subject_, _predicate_, and _object_).
// Finally, `graph` will be the graph of the created quads.
// If `callback` is given, each result is passed through it
// and iteration halts when it returns truthy for any quad.
// If instead `array` is given, each result is added to the array.
_findInIndex (index0, key0, key1, key2, name0, name1, name2, graph, callback, array) {
let tmp, index1, index2
// If a key is specified, use only that part of index 0.
if (key0) {
(tmp = index0, index0 = {})[key0] = tmp[key0]
}
for (const value0 in index0) {
index1 = index0[value0]
if (index1) {
// If a key is specified, use only that part of index 1.
if (key1) {
(tmp = index1, index1 = {})[key1] = tmp[key1]
}
for (const value1 in index1) {
index2 = index1[value1]
if (index2) {
// If a key is specified, use only that part of index 2, if it exists.
const values = key2 ? (key2 in index2 ? [key2] : []) : Object.keys(index2)
// Create quads for all items found in index 2.
for (let l = 0; l < values.length; l++) {
const parts = {
[name0]: value0,
[name1]: value1,
[name2]: values[l]
}
const quad = this._getQuad(parts.subject, parts.predicate, parts.object, graph)
if (array) {
array.push(quad)
} else if (callback(quad)) {
return true
}
}
}
}
}
}
return array
}
// ### `_countInIndex` counts matching quads in a three-layered index.
// The index base is `index0` and the keys at each level are `key0`, `key1`, and `key2`.
// Any of these keys can be undefined, which is interpreted as a wildcard.
_countInIndex (index0, key0, key1, key2) {
let count = 0
let tmp, index1, index2
// If a key is specified, count only that part of index 0
if (key0) {
(tmp = index0, index0 = {})[key0] = tmp[key0]
}
for (const value0 in index0) {
index1 = index0[value0]
if (index1) {
// If a key is specified, count only that part of index 1
if (key1) {
(tmp = index1, index1 = {})[key1] = tmp[key1]
}
for (const value1 in index1) {
index2 = index1[value1]
if (index2) {
if (key2) {
// If a key is specified, count the quad if it exists
(key2 in index2) && count++
} else {
// Otherwise, count all quads
count += Object.keys(index2).length
}
}
}
}
}
return count
}
// ### `_getGraphs` returns an array with the given graph,
// or all graphs if the argument is null or undefined.
_getGraphs (graph) {
if (!isString(graph)) {
return this._graphs
}
return {
[graph]: this._graphs[graph]
}
}
_match (subject, predicate, object, graph) {
// Convert terms to internal string representation
subject = subject && termToId(subject)
predicate = predicate && termToId(predicate)
object = object && termToId(object)
graph = graph && termToId(graph)
const quads = []
const graphs = this._getGraphs(graph)
const ids = this._ids
let content, subjectId, predicateId, objectId
// Translate IRIs to internal index keys.
if (
(isString(subject) && !(subjectId = ids[subject])) ||
(isString(predicate) && !(predicateId = ids[predicate])) ||
(isString(object) && !(objectId = ids[object]))
) {
return quads
}
for (const graphId in graphs) {
content = graphs[graphId]
// Only if the specified graph contains triples, there can be results
if (content) {
// Choose the optimal index, based on what fields are present
if (subjectId) {
if (objectId) {
// If subject and object are given, the object index will be the fastest
this._findInIndex(content.objects, objectId, subjectId, predicateId, 'object', 'subject', 'predicate', graphId, null, quads)
} else {
// If only subject and possibly predicate are given, the subject index will be the fastest
this._findInIndex(content.subjects, subjectId, predicateId, null, 'subject', 'predicate', 'object', graphId, null, quads)
}
} else if (predicateId) {
// if only predicate and possibly object are given, the predicate index will be the fastest
this._findInIndex(content.predicates, predicateId, objectId, null, 'predicate', 'object', 'subject', graphId, null, quads)
} else if (objectId) {
// If only object is given, the object index will be the fastest
this._findInIndex(content.objects, objectId, null, null, 'object', 'subject', 'predicate', graphId, null, quads)
} else {
// If nothing is given, iterate subjects and predicates first
this._findInIndex(content.subjects, null, null, null, 'subject', 'predicate', 'object', graphId, null, quads)
}
}
}
return quads
}
_getQuad (subjectId, predicateId, objectId, graphId) {
return this._quads.get(this._toId(subjectId, predicateId, objectId, graphId))
}
_setQuad (subjectId, predicateId, objectId, graphId, quad) {
this._quads.set(this._toId(subjectId, predicateId, objectId, graphId), quad)
}
_deleteQuad (subjectId, predicateId, objectId, graphId) {
this._quads.delete(this._toId(subjectId, predicateId, objectId, graphId))
}
_createDataset (quads) {
return new this.constructor(quads)
}
_toId (subjectId, predicateId, objectId, graphId) {
return `${subjectId}:${predicateId}:${objectId}:${graphId}`
}
}
export default DatasetCore