-
Notifications
You must be signed in to change notification settings - Fork 0
/
graphops.rb
2600 lines (2217 loc) · 89.5 KB
/
graphops.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
require 'intertwingler/version'
require 'mimemagic'
require 'set'
require 'rdf'
require 'rdf/vocab'
require 'rdf/reasoner'
require 'sparql'
require 'intertwingler/resolver'
require 'intertwingler/util'
# load up my vocabs before reasoner is applied
require 'intertwingler/vocab'
module Intertwingler
# gotta make sure this gets run
RDF::Reasoner.apply(:rdfs, :owl)
# This is to attach inferencing operations directly to the
# repository instead of bolting it on every time like a schmuck
# this module bolts functionality onto RDF::Repository
module GraphOps
include Intertwingler::Util::Clean
private
# we type this out a lot so let's not
SAO = SPARQL::Algebra::Operator
CI = Intertwingler::Vocab::CI
# rdf term type tests
NTESTS = { uri: :"uri?", blank: :"node?", literal: :"literal?" }.freeze
NMAP = { iri: :uri, bnode: :blank }.merge(
([%i[uri blank literal]] * 2).transpose.to_h)
# if the instance data doesn't have an exact property mentioned in
# the spec, it may have an equivalent property or subproperty we
# may be able to use. we could imagine a scoring system analogous
# to the one used by CSS selectors, albeit using the topological
# distance of classes/predicates in the spec versus those in the
# instance data.
# think about dcterms:title is a subproperty of dc11:title even
# though they are actually more like equivalent properties;
# owl:equivalentProperty is not as big a conundrum as
# rdfs:subPropertyOf.
# if Q rdfs:subPropertyOf P then S Q O implies S P O. this is
# great but property Q may not be desirable to display.
# it may be desirable to be able to express properties to never
# use as a label, such as skos:hiddenLabel
# consider ranked alternates, sequences, sequences of alternates.
# (this is what fresnel does fyi)
LABELS = {
RDF::RDFS.Resource => {
label: [
# main
[RDF::Vocab::SKOS.prefLabel, RDF::RDFS.label,
RDF::Vocab::DC.title, RDF::Vocab::DC11.title, RDF::RDFV.value],
# alt
[RDF::Vocab::SKOS.altLabel, RDF::Vocab::DC.alternative,
RDF::Vocab::SKOS.hiddenLabel],
],
desc: [
# main will be cloned into alt
[RDF::Vocab::DC.abstract, RDF::Vocab::DC.description,
RDF::Vocab::DC11.description, RDF::RDFS.comment,
RDF::Vocab::SKOS.note],
],
},
RDF::Vocab::FOAF.Document => {
label: [
# main
[RDF::Vocab::DC.title, RDF::Vocab::DC11.title],
# alt
[RDF::Vocab::BIBO.shortTitle, RDF::Vocab::DC.alternative],
],
desc: [
# main
[RDF::Vocab::BIBO.abstract, RDF::Vocab::DC.abstract,
RDF::Vocab::DC.description, RDF::Vocab::DC11.description],
# alt
[RDF::Vocab::BIBO.shortDescription],
],
},
RDF::Vocab::FOAF.Agent => {
label: [
# main (will get cloned into alt)
[RDF::Vocab::SKOS.prefLabel, RDF::Vocab::FOAF.name],
# alt (this was an ugly decision but this will go away soon)
[RDF::Vocab::SKOS.altLabel, RDF::Vocab::FOAF.nick],
],
desc: [
# main cloned into alt
[RDF::Vocab::FOAF.status],
],
},
}
LABELS[RDF::OWL.Thing] = LABELS[RDF::RDFS.Resource]
# Coerce a node spec into a canonical form. Node specs in
# #subjects_for and #objects_for methods are arrays of symbols `:uri`,
# `:blank`, `:literal`, and then the synonyms `:resource` (shorthand
# for `[:uri, :blank]`), `:iri` for `:uri`, and `:bnode` (for
# `:blank`).
#
# @param spec [Symbol, Array<Symbol>] the node spec
# @param rev [false, true] whether the node spec is to be applied to
# a subject node rather than an object node
#
def coerce_node_spec spec, rev: false
spec = [] if spec.nil?
spec = spec.respond_to?(:to_a) ? spec.to_a : [spec]
spec = spec - [:resource] + [:uri, :blank] if spec.include? :resource
raise 'Subjects are never literals' if rev and spec.include? :literal
# normalize out the synonyms
spec = NMAP.values_at(*spec).compact.uniq
# give us some nice defaults if this is still empty
if spec.empty?
spec = NTESTS.keys
# make sure this doesn't end up in here if we're looking at subjects
spec.delete :literal if rev
end
# et voilà
spec.uniq
end
# Determine whether a given node matches a node spec.
def node_matches? node, spec
spec.any? { |k| node.send NTESTS[k] }
end
# this ensures languages are something we can use
def coerce_languages languages
languages = languages.respond_to?(:to_a) ? languages.to_a : [languages]
languages.map { |lang| lang.to_s.strip.tr_s(?_, ?-).downcase }.uniq
end
# this tells us if the literal's language is in our given set
def is_language? literal, languages
return false unless literal.literal? and lang = literal.language
languages = coerce_languages languages
lang = lang.to_s.strip.tr_s(?_, ?-).downcase
languages.include? lang
end
# this gives us a set of inverse (and symmetric) properties for
# the given input
def invert_semantic properties, entail: false
# 1warn properties.inspect
properties = assert_resources properties, empty: false
inverted = properties.map do |p|
if icache[p]
icache[p]
else
# inverse properties are available by entailment
set = p.respond_to?(:inverseOf) ? p.inverseOf.to_set : Set[]
# symmetric properties go in as-is
set << p if symmetric? p
icache[p] = set
end
end.reduce :|
# warn properties.inspect, inverted.inspect
# don't forget to entail
entail && !inverted.empty? ? property_set(inverted) : inverted
end
# this gives us a label spec we can use
def process_labels struct
out = {}
struct.each do |type, spec|
raise ArgumentError,
"keys need to be RDF::URI, not #{type.class}" unless
type.is_a? RDF::URI
raise ArgumentError,
"spec needs to be a Hash, not #{spec.inspect}" unless
spec.is_a? Hash
ospec = out[type] ||= {}
spec.each do |variant, pair|
raise ArgumentError,
"variant needs to be :label or :desc, not #{variant}" unless
%i[label desc].include? variant
raise ArgumentError,
"struct[#{type}][#{variant}] must be an array of arrays" unless
pair.is_a? Array and !pair.empty? and
pair.all? { |x| x.is_a? Array }
# truncate list to two elements (main, alt)
pair = pair[0, 2]
vspec = ospec[variant] ||= [[], []]
pair.each_index do |i|
preds = pair[i].dup
raise ArgumentError,
"specify at least one property" if preds.empty?
raise ArgumentError,
"property list needs to be RDF::URIs" unless
preds.all? { |p| p.is_a? RDF::URI }
j = 0
loop do
# obtain equivalent properties we don't already have
equiv = preds[j].entail(:equivalentProperty) - preds
# splice them into the existing predicates
preds.insert(j + 1, *equiv) unless equiv.empty?
# skip over what we just added
j += equiv.length + 1
# stop when we have run off the end
break if j >= preds.length
end
# now we prepend the new predicates and squash them down;
# we're prepending because this may already exist via the
# equivalent class bit a few lines down
vspec[i].unshift(*preds)
vspec[i].uniq!
end
# copy main to alt if alt is missing
if pair.length < 2
vspec[1].unshift(*vspec[0])
vspec[1].uniq!
end
end
# add any equivalent classes
type.entail(:equivalentClass).each { |equiv| out[equiv] ||= spec }
end
out.freeze
end
public
# Returns the input term coerced to an RDF::Vocabulary::Term.
#
# @param term [URI, RDF::URI, RDF::Vocabulary::Term, #to_s] the term
# @param strict [false, true] whether to strictly enforce the conversion
#
# @return [RDF::Vocabulary::Term, RDF::URI, nil]
#
def coerce_term term, uri: false, strict: false
out = tcache[coerce_resource(term).to_s] ||=
coerce_resource term, as: :term
return if uri and !out.uri?
return if strict and [RDF::Vocabulary::Term, RDF::Vocabulary].none? do |c|
out.is_a? c
end
out
end
# Retrieve the current structure being used to govern how labels
# are resolved against subjects.
#
# @return [Hash{RDF::URI=>Hash{Symbol=>Array<Array<RDF::URI>>}}]
# the label structure
#
def label_spec
@labels ||= process_labels LABELS
end
# Set a new structure for determining how labels are resolved.
# This is a hash where the keys are RDF types, and the values are
# hashes containing the keys `:label` and `:desc`, whose values
# are arrays containing one or two values (for main and alternate;
# if alternate is missing then main will be used instead), which
# themselves are arrays containing one or more RDF properties, in
# order of preference for the given class.
#
# @param spec [Hash{RDF::URI=>Hash{Symbol=>Array<Array<RDF::URI>>}}]
# what a mouthful
#
# @return [Hash{RDF::URI=>Hash{Symbol=>Array<Array<RDF::URI>>}}]
# the same label structure, but normalized
#
def label_spec= spec
@labels = process_labels spec
end
# Get the objects for a given subject-predicate pair. Either of
# `predicate` or `graph` can be array-able, in which case the
# Cartesian product will be evaluated. If `entail` is
# true (the default), the predicate(s) will be expanded out into
# the full set of properties via `rdfs:subPropertyOf` and
# `owl:equivalentProperty` relations, as well as `owl:inverseOf`
# and `owl:SymmetricProperty` entailments. Passing in a `graph`
# will constrain the search to one or more named graphs, otherwise
# all graphs are queried.
#
# @param subject [RDF::Resource] the subject
# @param predicate [RDF::URI, Array<RDF::URI>] the predicate(s)
# @param graph [RDF::Resource, Array<RDF::Resource>] the graph(s)
# @param entail [true, false] whether to entail
# @param only [Symbol, Array<Symbol>] limit to certain node specs
# @param language [String, Symbol, Array<String, Symbol>]
# constrain literals to these languages
# @param datatype [RDF::URI, Array<RDF::URI>] constrain literals
# to these datatypes
# @param swap [false, true] noop for argument parity with #subject_for
#
# @yieldparam object [RDF::Resource, RDF::Literal] the object
# @yieldparam rel [Set<RDF::URI>] predicates pointing toward this
# object from the subject
# @yieldparam rev [Set<RDF::URI>] predicates pointing *out* of
# this object to the subject
#
# @return [Array] the resulting objects, or otherwise the
# aggregate results of the block.
#
def objects_for subject, predicate, graph: nil, entail: true,
only: [], language: nil, datatype: nil, swap: false, &block
# XXX you know what might be smart? fine-tuning the entailment
# so it also does owl:sameAs on nodes (even the graph),
only = coerce_node_spec only
subject = assert_resource subject
predicate = assert_resources predicate, blank: false, empty: true
datatype = assert_resources datatype, blank: false
graph = assert_resources graph
language = (language.respond_to?(:to_a) ? language.to_a : [language])
# entail all the predicates
predicate = property_set predicate if entail
# do the reverse predicates once now instead of recomputing them
# with every graph
revp = invert_semantic predicate, entail: entail unless only == [:literal]
# warn "wat #{revp} #{only}"
# add a single nil graph for triple semantics
graph << nil if graph.empty?
# add a single nil predicate for wildcards
predicate << nil if predicate.empty?
# okay go
out = graph.reduce({}) do |out, g|
predicate.each do |p|
query([subject, p, nil, g]).objects.each do |o|
# ignore statement objects that don't match the spec
next unless node_matches?(o, only)
# filter out the literals
if o.literal?
# ignore statement objects that don't match the language
next unless language.empty? or is_language? o, language
# ignore statement objects that don't match the datatype
next unless datatype.empty? or datatype.include? o.datatype
end
entry = out[o] ||= [Set[], Set[]]
entry.first << p
end
end
# now we do the reverse
unless only == [:literal]
revp.each do |p|
query([nil, p, subject, g]).subjects.each do |s|
next unless node_matches? s, only
# note of course the subject is never going to be a
# literal so we don't have to check for languages/datatypes
entry = out[s] ||= [Set[], Set[]]
entry.last << p
end
end
end
# pass out back out
out
end
# *here* is where the block gets called, on node, preds out, preds in
return out.map { |node, preds| block.call node, *preds } if block
# otherwise we just return the accumulated objects
out.keys
end
# Get the subjects for a given predicate-object pair. Behaves just
# like #objects_for in many respects, without the provisions for
# literals (as subjects never are). The named parameter, `:swap`,
# will reorder the positional parameters from the order they
# appear in RDF statements (`predicate`, `object`) to always
# beginning with the node on the end of the statement (`object`,
# `predicate`). This helps mitigate certain acrobatics needed in
# contexts when both #subject_for and #object_for are called
# together. Note as well that `:only` does not accept `:literal`
# as a node spec, since subjects can never be literals.
#
# @param predicate [RDF::URI] the statement predicate(s) (or the
# object if `:swap` is true)
# @param object [RDF::Resource, RDF::Literal] the object (or the
# predicates if `:swap` is true)
# @param graph [RDF::Resource, Array<RDF::Resource>] the graph(s)
# @param entail [true, false] whether to entail
# @param only [Symbol, Array<Symbol>] limit to certain node specs
# @param swap [false, true] swap positional parameters as described
#
# @yieldparam subject [RDF::Resource, RDF::Literal] the object
# @yieldparam rel [Set<RDF::URI>] predicates pointing from this
# subject to the object
# @yieldparam rev [Set<RDF::URI>] predicates pointing from the
# object to this subject
#
# @return [Array] the resulting subjects, or otherwise the
# aggregate results of the block.
#
def subjects_for predicate, object, graph: nil,
entail: true, only: [], swap: false, &block
# change the order of the positional parameters because this was
# a bad idea to do it this way at the outset
predicate, object = object, predicate if swap
only = coerce_node_spec only, rev: true
predicate = assert_resources predicate, blank: false, empty: true
object = assert_term object
graph = assert_resources graph
predicate = property_set predicate if entail
# note that this is a slightly different regime than object_for
revp = invert_semantic predicate, entail: entail unless object.literal?
# add nil graph to the array for triple semantics
graph << nil if graph.empty?
predicate << nil if predicate.empty?
# okay go
out = graph.reduce({}) do |out, g|
predicate.each do |p|
query([nil, p, object, g]).subjects.each do |s|
next unless node_matches? s, only
# we don't need to do any fussy testing of literals in here
entry = out[s] ||= [Set[], Set[]]
entry.first << p
end
end
# again our criterion for processing subjects is different
unless object.literal?
revp.each do |p|
query([object, p, nil, g]).objects.each do |o|
next unless node_matches? o, only
entry = out[o] ||= [Set[], Set[]]
entry.last << p
end
end
end
out
end
# process the block if we have one
return out.map { |node, preds| block.call node, *preds } if block
# otherwise just give back the subjects
out.keys
end
# Obtain a key-value structure for the given subject, optionally
# constraining the result by node type (:resource, :uri/:iri,
# :blank/:bnode, :literal)
#
# @param subject [RDF::Resource] the subject of the inquiry
# @param graph [RDF::Resource, Array<RDF::Resource>] named graph(s)
# @param rev [false, true] generate a struct from inbound links
# @param only [Symbol, Array<Symbol>] one or more node types
# @param inverses [false, true] whether to include
# inverse/symmetric properties
#
# @yieldparam node [RDF::Resource, RDF::Literal] a node to be manipulated
# @yieldreturn [RDF::Resource, RDF::Literal] the transformed node
#
# @return [Hash{RDF::URI=>Array<RDF::Resource,RDF::Literal>] the struct
#
def struct_for subject, graph: nil, only: nil,
rev: false, inverses: false, &block
only = coerce_node_spec only
subject = assert_resource subject
graph = assert_resources graph
graph << nil if graph.empty?
rsrc = {}
pattern = rev ? [nil, nil, subject] : [subject, nil, nil]
graph.each do |g|
query(pattern, graph_name: g) do |stmt|
node = rev ? stmt.subject : stmt.object
next unless node_matches? node, only
node = block.call node if block
p = coerce_term stmt.predicate
(rsrc[p] ||= []) << node
end
if inverses and only != [:literal]
pattern = rev ? [subject, nil, nil] : [nil, nil, subject]
query(pattern, graph_name: g) do |stmt|
node = rev ? stmt.object : stmt.subject
next unless node_matches? node, only
node = block.call node if block
invert_semantic(stmt.predicate).each do |inverse|
(rsrc[inverse] ||= []) << node
end
end
end
end
# make sure these are clean before shipping em out
cmp = cmp_term
rsrc.values.each { |v| v.sort!(&cmp).uniq! }
rsrc
end
# Obtain all and only the `rdf:type`s directly asserted on the subject.
#
# @param subject [RDF::Resource]
# @param type [RDF::Term, :to_a] override searching for type(s) and
# just return what is passed in (XXX why did i do this?)
# @param struct [Hash] pull from an attribute-value hash rather than
# the graph
#
# @return [Array<RDF::URI>] the types asserted on the subject
#
def types_for subject, graph: nil, entail: false, struct: nil
if struct
assert_struct struct
return (struct[RDF.type] || []).select(&:uri?)
end
objects_for subject, RDF.type, graph: graph, entail: entail, only: :uri
end
alias_method :asserted_types, :types_for
# Obtain the most appropriate label(s) for the subject's type(s).
# Returns one or more (depending on the `unique` flag)
# predicate-object pairs in order of preference.
#
# @param subject [RDF::Resource, RDF::Literal] the subject (or the
# label itself)
# @param unique [true, false] only return the first pair
# @param type [RDF::Term, Array] supply asserted types if already
# retrieved
# @param lang [nil, String, Symbol, Array<String, Symbol>] not
# currently implemented (will be conneg)
# @param desc [false, true] retrieve description instead of label
# @param alt [false, true] retrieve alternate instead of main
# @param noop [false, true] return a pair `[nil, subject]` if nothing found
# @param struct [Hash] the predicate-object struct to search in lieu of
# consulting the graph
#
# @return [Array<(RDF::URI, RDF::Literal)>, Array<Array<(RDF::URI,
# RDF::Literal)>>] either a predicate-object pair or an array of
# pairs.
#
def label_for subject, graph: nil, entail: true, unique: true,
lang: nil, desc: false, alt: false, noop: false, struct: nil
# a literal is its own label
if subject.is_a? RDF::Literal
# do this for return value parity
return unique ? [nil, subject] : [[nil, subject]]
end
subject = assert_resource subject
graph = assert_resources graph
# get the asserted types
asserted = types_for subject, graph: graph, struct: struct
# get the full type stratum if we're entailing, otherwise fake
# up a single layer for the loop below
strata = entail ? type_strata(asserted) : [asserted]
strata << [RDF::RDFS.Resource] unless
strata.flatten.include? RDF::RDFS.Resource
struct ||= struct_for subject, graph: graph, only: :literal
seen = Set[]
accum = []
strata.each do |types|
types.each do |type|
next unless preds = (label_spec.dig(
type, desc ? :desc : :label) || [])[alt ? 1 : 0]
preds.each do |p|
next unless vals = struct[p]
vals.each do |v|
next unless v.literal?
pair = [p, v]
accum << pair unless seen.include? pair
seen << pair
end
# XXX TODO sort vals
end
end
end
accum << [nil, subject] if noop and accum.empty?
unique ? accum.first : accum.uniq
end
private
AUTHOR = [Intertwingler::Vocab::PAV.authoredBy, RDF::Vocab::DC.creator,
RDF::Vocab::DC11.creator, RDF::Vocab::PROV.wasAttributedTo]
CONTRIB = [Intertwingler::Vocab::PAV.contributedBy, RDF::Vocab::DC.contributor,
RDF::Vocab::DC11.contributor]
AUTHOR_LIST = [RDF::Vocab::BIBO.authorList]
CONTRIB_LIST = [RDF::Vocab::BIBO.contributorList]
[AUTHOR, CONTRIB, AUTHOR_LIST, CONTRIB_LIST].each do |preds|
i = 0
loop do
# note we are not using property_set or objects_for because we
# *only* want equivalentProperty entailment, not subproperties
equiv = preds[i].entail(:equivalentProperty) - preds
preds.insert(i + 1, *equiv) unless equiv.empty?
i += equiv.length + 1
break if i >= preds.length
end
preds.freeze
end
public
# Return an ordered list of authors (or contributors) for a given
# subject. Tries `bibo:authorList` (or `bibo:contributorList`)
# first before going on to `dct:creator` etc. Any unsorted authors
# not listed in an explicit order are sorted by name (label).
#
# @param subject [RDF::Resource] the entity whose authors we are
# looking for
# @param graph [nil, RDF::Resource] a named graph identifier
# @param unique [false, true] whether to return only one value
# @param contrib [false, true] whether to list contributors
# instead of authors
#
# @return [RDF::Term, Array<RDF::Term>] the author(s)
#
def authors_for subject, graph: nil, unique: false, contrib: false
subject = assert_resource subject
authors = []
# try the author list;
(contrib ? CONTRIB_LIST : AUTHOR_LIST).each do |pred|
o = first_object([subject, pred, nil])
next unless o
# note this use of RDF::List is not particularly well-documented
authors += RDF::List.new(subject: o, graph: self).to_a
end
# now try various permutations of the author/contributor predicate
unsorted = (contrib ? CONTRIB : AUTHOR).reduce([]) do |u, pred|
u + query([subject, pred, nil]).objects
end
# XXX maybe pass in some parameters to this??
lcmp = cmp_label
# sort unsorted according to labels and then append to any
# explicitly sorted list
authors += unsorted.uniq.sort(&lcmp)
# note "unique" just means give me the first author; there may
# be duplicates from authors being in both lists so we still `uniq`
unique ? authors.first : authors.uniq
end
# Return the terminal replacements (as in, replacements that
# themselves have not been replaced) for the given subject, if
# any.
#
# @param subject [RDF::Resource] the entity whose replacements we
# are looking for
# @param graph [nil, RDF::Resource] a named graph identifier
# @param published [false, true] whether to constrain the search
# to published resources
# @param published [true, false]
# @param noop [false, true] whether to return unconditionally
#
# @return [Array<RDF::Resource>] the replacements, if any
#
def replacements_for subject, graph: nil, published: true, noop: false
# XXX TODO this thing needs to be coded to handle fragments;
# still not sure what to do about fragments
subject = assert_resource subject
graph = assert_resources graph
# `seen` is a hash mapping resources to publication status and
# subsequent replacements. it collects all the resources in the
# replacement chain in :fwd (replaces) and :rev (replaced-by)
# members, along with a boolean :pub. `seen` also performs a
# duty as cycle-breaking sentinel.
seen = {}
queue = [subject]
while (test = queue.shift)
# fwd is "replaces", rev is "replaced by"
entry = seen[test] ||= {
pub: published?(test), fwd: Set[], rev: Set[] }
queue += (
subjects_for(RDF::Vocab::DC.replaces, subject, graph: graph) +
objects_for(subject, RDF::Vocab::DC.isReplacedBy, graph: graph,
only: :resource)).uniq.map do |r| # r = replacement
next if seen.include? r
# we preemptively create a structure
seen[r] ||= { pub: published?(r), fwd: Set[], rev: Set[] }
seen[r][:fwd] << test
entry[:rev] << r
r
end.compact.uniq
end
# if we're calling from a published context, we return the
# (topologically) last published resource(s), even if they are
# replaced ultimately by unpublished resources.
out = seen.map { |k, v| v[:rev].empty? ? k : nil }.compact - [subject]
# now we modify `out` based on the publication status of the context
if published
pubout = out.select { |o| seen[o][:pub] }
# if there is anything left after this, return it
return pubout unless pubout.empty?
# now we want to find the penultimate elements of `seen` that
# are farthest along the replacement chain but whose status is
# published
# start with `out`, take the union of their :fwd members, then
# take the subset of those which are published. if the result
# is empty, repeat. (this is walking backwards through the
# graph we just walked forwards through to construct `seen`)
loop do
# XXX THIS NEEDS A TEST CASE
out = seen.values_at(*out).map { |v| v[:fwd] }.reduce(:+).to_a
break if out.empty?
pubout = out.select { |o| seen[o][:pub] }
return pubout unless pubout.empty?
end
end
out
end
# Return the dates associated with the subject.
#
# @param subject [RDF::Resource] the entity whose replacements we
# are looking for
# @param graph [nil, RDF::Resource] a named graph identifier
# @param predicate [RDF::URI, Array<RDF::URI>] the predicate(s) to check
# @param datatype [RDF::URI, Array<RDF::URI>] the datatype(s) to check
#
# @return [Array<Date>] the date(time)s, if any
#
def dates_for subject, graph: nil, predicate: RDF::Vocab::DC.date,
datatype: [RDF::XSD.dateTime, RDF::XSD.date]
objects_for(subject, predicate, graph: graph,
datatype: datatype, only: :literal) do |o|
o.object
end.select { |d| d.is_a? Date }.sort.uniq
end
# Return the dates associated with the subject.
#
# @param subject [RDF::Resource] the entity whose replacements we
# are looking for
# @param graph [nil, RDF::Resource] a named graph identifier
# @param predicate [RDF::URI, Array<RDF::URI>] the predicate(s) to check
# @param datatype [RDF::URI, Array<RDF::URI>] the datatype(s) to check
#
# @return [Array<RDF::Literal>] the dates, if any
#
def formats_for subject, graph: nil, predicate: RDF::Vocab::DC.format,
datatype: [RDF::XSD.token]
objects_for(subject, predicate,
graph: graph, datatype: datatype, only: :literal) do |o|
t = o.object.to_s.strip.downcase
/\//.match?(t) ? MimeMagic.new(t) : nil
end.compact.sort.uniq
end
# Return a Hash containing common values useful for ranking
# subjects. Includes whether the
#
# @param subject [RDF::Resource] a subject node
# @param graph [nil, RDF::Resource] an optional graph identifier
# @param date [nil,Date,DateTime] a default date
# @param ints [false, true] whether to represent booleans as integers
#
# @return [Hash] a data structure for ranking
#
def ranking_data_for subject, graph: nil, date: nil, ints: false
date ||= DateTime.new
out = {
published: published?(subject, graph: graph, circulated: false),
circulated: published?(subject, graph: graph, circulated: true),
replaced: replaced?(subject, graph: graph),
retired: retired?(subject, graph: graph),
ctime: dates_for(subject, graph: graph,
predicate: RDF::Vocab::DC.created).last || date,
mtime: dates_for(subject, graph: graph).last || date,
}
# convert to integers lol
if ints
# XXX H88888888 >:|
bits = { false => 0, true => 1 }
%i[published circulated replaced retired].each do |k|
# love me some mutable data structures
out[k] = bits[out[k]]
end
end
out
end
private
# the fragment spec determines the kinds of types which are
# always considered fragments (rather than full documents)
#
# sparql-based fragments are different because they relate many
# fragment classes to many document classes via many property
# paths, and so we are ultimately testing the cartesian product here.
FRAGMENTS = [
[
[RDF::Vocab::BIBO.DocumentPart],
[SAO::Reverse.new(RDF::Vocab::DC.hasPart), RDF::Vocab::DC.isPartOf],
[RDF::Vocab::FOAF.Document],
],
[
[RDF::RDFS.Resource],
[RDF::Vocab::FOAF.isPrimaryTopicOf,
SAO::Reverse.new(RDF::Vocab::DC.hasPart), RDF::Vocab::DC.isPartOf],
[],
],
]
# these are all the types unambiguously considered to be "documents"
DOCUMENTS = [RDF::Vocab::FOAF.Document].freeze
def expand_documents docs
docs = assert_resources docs, blank: false, empty: false, vocab: true
type_strata docs, descend: true
end
FRAGMENT_COERCIONS = [-> x { coerce_resources(x, as: :term) }] * 4
FRAGMENT_COERCIONS[1] = -> x { entail_property_path x }
def expand_fragments_sparql spec
spec.map do |row|
(0..3).map do |i|
instance_exec row[i], &FRAGMENT_COERCIONS[i]
end.map(&:to_set)
end
end
public
# Returns the spec by which document fragments are determined.
# Takes the form of a hash where the keys are RDF types and the
# values are also hashes where the keys are predicates, and the
# values are flags as to whether the predicate is to be taken as
# a reverse relation.
#
# @return [Hash{RDF::URI=>Hash{RDF::URI=>false,true}}]
#
def fragment_spec
@fragments ||= expand_fragments_sparql FRAGMENTS
end
# Set a new fragment spec.
#
# @param spec [Hash{RDF::URI=>Hash{RDF::URI=>false,true}}] a
# fragment spec
#
# @return [Hash{RDF::URI=>Hash{RDF::URI=>false,true}}] an
# expando'd spec, with all equivalent classes and properties
# dereferenced.
#
def fragment_spec= spec
@fragments = expand_fragments_sparql spec
end
private
# XXX YO MAYBE REIN IN THE CACHES? lol
# host document cache
def hcache
@hcache ||= Intertwingler::Util::LRU.new capacity: cache_limit
end
# term cache
def tcache
@tcache ||= Intertwingler::Util::LRU.new capacity: cache_limit
end
# type strata cache
def tscache
@tscache ||= Intertwingler::Util::LRU.new capacity: cache_limit
end
# type strata descending cache
def tdcache
@tdcache ||= Intertwingler::Util::LRU.new capacity: cache_limit
end
# property set cache
def pcache
@pcache ||= Intertwingler::Util::LRU.new capacity: cache_limit
end
# inverseOf cache
def icache
@icache ||= Intertwingler::Util::LRU.new capacity: cache_limit
end
# equivalents cache
def eqcache
@eqcache ||= Intertwingler::Util::LRU.new capacity: cache_limit
end
# subproperty/class cache
def sbcache
@sbcache ||= Intertwingler::Util::LRU.new capacity: cache_limit
end
# superproperty/class cache
def sucache
@sucache ||= Intertwingler::Util::LRU.new capacity: cache_limit
end
public
def cache_limit
@cache_limit ||= Float::INFINITY
end
def cache_limit= limit
[hcache, tcache, tscache, tdcache, pcache, icache,
eqcache, sbcache, sucache].each { |c| c.capacity = limit }
@cache_limit = limit
end
def flush_cache
[hcache, tcache, tscache, tdcache, pcache, icache,
eqcache, sbcache, sucache].each { |c| c.clear }
nil
end
private
BASE_TYPES = [RDF::RDFS.Resource, RDF::OWL.Thing].freeze
def host_for_internal subject, seen = Set[], graph: nil,
published: false, circulated: false, force: false,
documents: nil, fragments: nil
# caching manoeuvre
key = [subject.to_s, graph.sort, published]