@@ -290,8 +290,7 @@ def self.compact(input, context, expanded: false, serializer: nil, **options)
290290 def self . flatten ( input , context , expanded : false , serializer : nil , **options )
291291 flattened = [ ]
292292 options = {
293- compactToRelative : true ,
294- extractAllScripts : true
293+ compactToRelative : true
295294 } . merge ( options )
296295
297296 # Expand input to simplify processing
@@ -518,6 +517,8 @@ def self.frame(input, frame, expanded: false, serializer: nil, **options)
518517 # @option options (see #initialize)
519518 # @option options [Boolean] :produceGeneralizedRdf (false)
520519 # If true, output will include statements having blank node predicates, otherwise they are dropped.
520+ # @option options [Boolean] :extractAllScripts (true)
521+ # If set, when given an HTML input without a fragment identifier, extracts all `script` elements with type `application/ld+json` into an array during expansion.
521522 # @raise [JsonLdError]
522523 # @yield statement
523524 # @yieldparam [RDF::Statement] statement
@@ -638,7 +639,7 @@ def self.loadRemoteDocument(url,
638639 options [ :headers ] [ 'Accept' ] . sub ( 'application/ld+json,' ,
639640 "application/ld+json;profile=#{ requestProfile } , application/ld+json;q=0.9," )
640641 end
641- documentLoader . call ( url , **options ) do |remote_doc |
642+ documentLoader . call ( url , extractAllScripts : extractAllScripts , **options ) do |remote_doc |
642643 case remote_doc
643644 when RDF ::Util ::File ::RemoteDocument
644645 # Convert to RemoteDocument
@@ -758,6 +759,28 @@ class << self
758759 alias fromRDF fromRdf
759760 end
760761
762+ ##
763+ # Hash of recognized script types and the loaders that decode them
764+ # into a hash or array of hashes.
765+ #
766+ # @return Hash{type, Proc}
767+ SCRIPT_LOADERS = {
768+ 'application/ld+json' => -> ( content , url :, **options ) do
769+ validate_input ( content , url : url ) if options [ :validate ]
770+ mj_opts = options . keep_if { |k , v | k != :adapter || MUTLI_JSON_ADAPTERS . include? ( v ) }
771+ MultiJson . load ( content , **mj_opts )
772+ end
773+ }
774+
775+ ##
776+ # Adds a loader for some specific content type
777+ #
778+ # @param [String] type
779+ # @param [Proc] loader
780+ def self . add_script_loader ( type , loader )
781+ SCRIPT_LOADERS [ type ] = loader
782+ end
783+
761784 ##
762785 # Load one or more script tags from an HTML source.
763786 # Unescapes and uncomments input, returns the internal representation
@@ -812,47 +835,52 @@ def self.load_html(input, url:,
812835 element = input . at_xpath ( "//script[@id='#{ id } ']" )
813836 raise JSON ::LD ::JsonLdError ::LoadingDocumentFailed , "No script tag found with id=#{ id } " unless element
814837
815- unless element . attributes [ 'type' ] . to_s . start_with? ( 'application/ld+json' )
838+ script_type = SCRIPT_LOADERS . keys . detect { |type | element . attributes [ 'type' ] . to_s . start_with? ( type ) }
839+ unless script_type
816840 raise JSON ::LD ::JsonLdError ::LoadingDocumentFailed ,
817841 "Script tag has type=#{ element . attributes [ 'type' ] } "
818842 end
819843
820- content = element . inner_html
821- validate_input ( content , url : url ) if options [ :validate ]
822- mj_opts = options . keep_if { |k , v | k != :adapter || MUTLI_JSON_ADAPTERS . include? ( v ) }
823- MultiJson . load ( content , **mj_opts )
844+ loader = SCRIPT_LOADERS [ script_type ]
845+ loader . call ( element . inner_html , url : url , **options )
824846 elsif extractAllScripts
825847 res = [ ]
826- elements = if profile
827- es = input . xpath ( "//script[starts-with(@type, 'application/ld+json;profile=#{ profile } ')]" )
828- # If no profile script, just take a single script without profile
829- es = [ input . at_xpath ( "//script[starts-with(@type, 'application/ld+json')]" ) ] . compact if es . empty?
830- es
831- else
832- input . xpath ( "//script[starts-with(@type, 'application/ld+json')]" )
833- end
834- elements . each do |element |
835- content = element . inner_html
836- validate_input ( content , url : url ) if options [ :validate ]
837- mj_opts = options . keep_if { |k , v | k != :adapter || MUTLI_JSON_ADAPTERS . include? ( v ) }
838- r = MultiJson . load ( content , **mj_opts )
839- if r . is_a? ( Hash )
840- res << r
841- elsif r . is_a? ( Array )
842- res . concat ( r )
848+
849+ SCRIPT_LOADERS . each do |type , loader |
850+ elements = if profile
851+ es = input . xpath ( "//script[starts-with(@type, '#{ type } ;profile=#{ profile } ')]" )
852+ # If no profile script, just take a single script without profile
853+ es = [ input . at_xpath ( "//script[starts-with(@type, '#{ type } ')]" ) ] . compact if es . empty?
854+ es
855+ else
856+ input . xpath ( "//script[starts-with(@type, '#{ type } ')]" )
857+ end
858+ elements . each do |element |
859+ content = element . inner_html
860+ r = loader . call ( content , url : url , extractAllScripts : true , **options )
861+ if r . is_a? ( Hash )
862+ res << r
863+ elsif r . is_a? ( Array )
864+ res . concat ( r )
865+ end
843866 end
844867 end
845868 res
846869 else
847- # Find the first script with type application/ld+json.
848- element = input . at_xpath ( "//script[starts-with(@type, 'application/ld+json;profile=#{ profile } ')]" ) if profile
849- element ||= input . at_xpath ( "//script[starts-with(@type, 'application/ld+json')]" )
850- raise JSON ::LD ::JsonLdError ::LoadingDocumentFailed , "No script tag found" unless element
870+ # Find the first script with a known type
871+ script_type , element = nil , nil
872+ SCRIPT_LOADERS . keys . each do |type |
873+ next if script_type # already found the type
874+ element = input . at_xpath ( "//script[starts-with(@type, '#{ type } ;profile=#{ profile } ')]" ) if profile
875+ element ||= input . at_xpath ( "//script[starts-with(@type, '#{ type } ')]" )
876+ script_type = type if element
877+ end
878+ unless script_type
879+ raise JSON ::LD ::JsonLdError ::LoadingDocumentFailed , "No script tag found" unless element
880+ end
851881
852882 content = element . inner_html
853- validate_input ( content , url : url ) if options [ :validate ]
854- mj_opts = options . keep_if { |k , v | k != :adapter || MUTLI_JSON_ADAPTERS . include? ( v ) }
855- MultiJson . load ( content , **mj_opts )
883+ SCRIPT_LOADERS [ script_type ] . call ( content , url : url , **options )
856884 end
857885 rescue MultiJson ::ParseError => e
858886 raise JSON ::LD ::JsonLdError ::InvalidScriptElement , e . message
0 commit comments