@@ -7,7 +7,7 @@ describe('Metafetch: Final Optimized Tests', () => {
7
7
let serverInvalidAssets : Server , serverUaEcho : Server , serverEmptyBody : Server ,
8
8
serverPrimaryMeta : Server , serverBaseTag : Server , serverCharset : Server ,
9
9
serverFallbackMeta : Server , serverAssetFallback : Server , serverBaseNoHref : Server ,
10
- serverMalformedAssets : Server , serverAmp : Server , serverHttp : Server ;
10
+ serverMalformedAssets : Server , serverAmp : Server , serverHttp : Server , serverJsonLd : Server ;
11
11
12
12
before ( ( done ) => {
13
13
serverInvalidAssets = http . createServer ( ( req , res ) => {
@@ -48,14 +48,39 @@ describe('Metafetch: Final Optimized Tests', () => {
48
48
else if ( req . url ?. startsWith ( '/page' ) ) res . setHeader ( 'Content-Type' , 'text/html' ) . end ( '<html><title>T</title></html>' ) ;
49
49
else res . setHeader ( 'Content-Type' , 'application/pdf' ) . end ( '%PDF-1.4' ) ;
50
50
} ) . listen ( 2511 , '127.0.0.1' ) ;
51
- serverHttp . on ( 'listening' , done ) ;
51
+ serverJsonLd = http . createServer ( ( req , res ) => {
52
+ res . setHeader ( 'Content-Type' , 'text/html' ) ;
53
+ let body = '' ;
54
+ switch ( req . url ) {
55
+ case '/basic' :
56
+ body = `<html><head><script type="application/ld+json">{"@context":"https://schema.org","@type":"NewsArticle","headline":"Article Headline"}</script></head></html>` ;
57
+ break ;
58
+ case '/nested' :
59
+ body = `<html><head><script type="application/ld+json">{"@context":"https://schema.org","author":{"@type":"Person","name":"Jane Doe"}, "unsupported": ["item1", "item2"]}</script></head></html>` ;
60
+ break ;
61
+ case '/malformed' :
62
+ body = `<html><head><meta name="description" content="Good"><script type="application/ld+json">{ "key": "value", </script></head></html>` ;
63
+ break ;
64
+ case '/multiple' :
65
+ body = `<html><head><meta name="description" content="A page with two scripts."><script type="application/ld+json">{"@type":"Organization","name":"My Company"}</script><script type="application/ld+json">{"@type":"WebSite","url":"https://example.com"}</script></head></html>` ;
66
+ break ;
67
+ case '/empty' :
68
+ body = `<html><head><script type="application/ld+json"></script></head></html>` ;
69
+ break ;
70
+ case '/non_object' :
71
+ body = `<html><head><script type="application/ld+json">"this is a string, not an object"</script></head></html>` ;
72
+ break ;
73
+ }
74
+ res . end ( body ) ;
75
+ } ) . listen ( 2512 , '127.0.0.1' ) ;
76
+ serverJsonLd . on ( 'listening' , done ) ;
52
77
} ) ;
53
78
54
79
after ( ( ) => {
55
80
serverInvalidAssets . close ( ) ; serverUaEcho . close ( ) ; serverEmptyBody . close ( ) ;
56
81
serverPrimaryMeta . close ( ) ; serverBaseTag . close ( ) ; serverCharset . close ( ) ;
57
82
serverFallbackMeta . close ( ) ; serverAssetFallback . close ( ) ; serverBaseNoHref . close ( ) ;
58
- serverMalformedAssets . close ( ) ; serverAmp . close ( ) ; serverHttp . close ( ) ;
83
+ serverMalformedAssets . close ( ) ; serverAmp . close ( ) ; serverHttp . close ( ) ; serverJsonLd . close ( ) ;
59
84
} ) ;
60
85
61
86
// --- Test Suites ---
@@ -65,7 +90,7 @@ describe('Metafetch: Final Optimized Tests', () => {
65
90
it ( ( ++ counter ) . toString ( ) . padStart ( 2 , '0' ) + '. should return a Promise' , ( ) => {
66
91
const promise = new Metafetch ( ) . fetch ( 'http://127.0.0.1:2511/page' ) ;
67
92
expect ( promise ) . to . be . an . instanceOf ( Promise ) ;
68
- promise . catch ( ( ) => { } ) ; // Suppress unhandled rejection warning
93
+ promise . catch ( ( ) => { } ) ;
69
94
} ) ;
70
95
71
96
it ( ( ++ counter ) . toString ( ) . padStart ( 2 , '0' ) + '. should reject with an error for an empty URL' , async ( ) => {
@@ -103,7 +128,7 @@ describe('Metafetch: Final Optimized Tests', () => {
103
128
}
104
129
} ) ;
105
130
} ) ;
106
-
131
+
107
132
describe ( '3. User-Agent Management' , ( ) => {
108
133
let counter = 0 ;
109
134
it ( ( ++ counter ) . toString ( ) . padStart ( 2 , '0' ) + '. should manage the instance user agent correctly' , ( ) => {
@@ -206,4 +231,66 @@ describe('Metafetch: Final Optimized Tests', () => {
206
231
expect ( res . images ) . to . be . an ( 'array' ) . that . is . not . empty ;
207
232
} ) ;
208
233
} ) ;
234
+
235
+ describe ( '7. Structured Data (JSON-LD)' , ( ) => {
236
+ let counter = 0 ;
237
+ const instance = new Metafetch ( ) ;
238
+
239
+ it ( ( ++ counter ) . toString ( ) . padStart ( 2 , '0' ) + '. should extract basic, flat JSON-LD data' , async ( ) => {
240
+ const res = await instance . fetch ( 'http://127.0.0.1:2512/basic' ) ;
241
+ expect ( res . meta ) . to . deep . include ( {
242
+ 'ld:@context' : 'https://schema.org' ,
243
+ 'ld:@type' : 'NewsArticle' ,
244
+ 'ld:headline' : 'Article Headline'
245
+ } ) ;
246
+ } ) ;
247
+
248
+ it ( ( ++ counter ) . toString ( ) . padStart ( 2 , '0' ) + '. should extract and flatten nested JSON-LD data' , async ( ) => {
249
+ const res = await instance . fetch ( 'http://127.0.0.1:2512/nested' ) ;
250
+ expect ( res . meta ) . to . deep . include ( {
251
+ 'ld:@context' : 'https://schema.org' ,
252
+ 'ld:author:@type' : 'Person' ,
253
+ 'ld:author:name' : 'Jane Doe'
254
+ } ) ;
255
+ // The current implementation doesn't handle arrays, so 'unsupported' should not exist
256
+ expect ( res . meta ) . to . not . have . property ( 'ld:unsupported' ) ;
257
+ } ) ;
258
+
259
+ it ( ( ++ counter ) . toString ( ) . padStart ( 2 , '0' ) + '. should handle malformed JSON-LD gracefully without crashing' , async ( ) => {
260
+ const res = await instance . fetch ( 'http://127.0.0.1:2512/malformed' ) ;
261
+ // Regular meta tags should still be parsed
262
+ expect ( res . meta ! . description ) . to . equal ( 'Good' ) ;
263
+ // Malformed ld+json should not add any 'ld:' keys
264
+ const ldKeys = Object . keys ( res . meta ! ) . filter ( k => k . startsWith ( 'ld:' ) ) ;
265
+ expect ( ldKeys ) . to . be . empty ;
266
+ } ) ;
267
+
268
+ it ( ( ++ counter ) . toString ( ) . padStart ( 2 , '0' ) + '. should merge data from multiple JSON-LD scripts' , async ( ) => {
269
+ const res = await instance . fetch ( 'http://127.0.0.1:2512/multiple' ) ;
270
+ // Note: The current implementation overwrites duplicate keys.
271
+ expect ( res . meta ) . to . deep . equal ( {
272
+ 'description' : 'A page with two scripts.' ,
273
+ 'ld:@type' : 'WebSite' , // Overwritten by second script
274
+ 'ld:name' : 'My Company' ,
275
+ 'ld:url' : 'https://example.com'
276
+ } ) ;
277
+ } ) ;
278
+
279
+ it ( ( ++ counter ) . toString ( ) . padStart ( 2 , '0' ) + '. should not extract JSON-LD when meta flag is disabled' , async ( ) => {
280
+ const res = await instance . fetch ( 'http://127.0.0.1:2512/basic' , { flags : { meta : false } } ) ;
281
+ expect ( res . meta ) . to . be . undefined ;
282
+ } ) ;
283
+
284
+ it ( ( ++ counter ) . toString ( ) . padStart ( 2 , '0' ) + '. should handle an empty JSON-LD script tag' , async ( ) => {
285
+ const res = await instance . fetch ( 'http://127.0.0.1:2512/empty' ) ;
286
+ expect ( res . meta ) . to . be . an ( 'object' ) . that . is . empty ;
287
+ } ) ;
288
+
289
+ it ( ( ++ counter ) . toString ( ) . padStart ( 2 , '0' ) + '. should ignore JSON-LD content that is not a JSON object' , async ( ) => {
290
+ const res = await instance . fetch ( 'http://127.0.0.1:2512/non_object' ) ;
291
+ expect ( res . meta ) . to . be . an ( 'object' ) . that . is . empty ;
292
+ const ldKeys = Object . keys ( res . meta ! ) . filter ( k => k . startsWith ( 'ld:' ) ) ;
293
+ expect ( ldKeys ) . to . be . empty ;
294
+ } ) ;
295
+ } ) ;
209
296
} ) ;
0 commit comments