Skip to content

Commit 6ba286c

Browse files
authored
Reject no root element XML as an invalid XML (#291)
## Why? GitHub: fix #289 We must reject all well-formed XMLs: https://www.w3.org/TR/2006/REC-xml11-20060816/#proc-types > Validating and non-validating processors alike MUST report violations of this specification's well-formedness constraints in the content of the [document entity](https://www.w3.org/TR/2006/REC-xml11-20060816/#dt-docent) and any other [parsed entities](https://www.w3.org/TR/2006/REC-xml11-20060816/#dt-parsedent) that they read. No root element XML is not well-formed because `document` requires one `element`: https://www.w3.org/TR/2006/REC-xml11-20060816/#sec-well-formed > [1] document ::= ( [prolog](https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-prolog) [element](https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-element) [Misc](https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-Misc)* ) - ( [Char](https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-Char)* [RestrictedChar](https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-RestrictedChar) [Char](https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-Char)* )
1 parent b5b148e commit 6ba286c

File tree

10 files changed

+66
-26
lines changed

10 files changed

+66
-26
lines changed

lib/rexml/parsers/baseparser.rb

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,11 @@ def pull_event
266266
path = "/" + @tags.join("/")
267267
raise ParseException.new("Missing end tag for '#{path}'", @source)
268268
end
269+
270+
unless @document_status == :in_element
271+
raise ParseException.new("Malformed XML: No root element", @source)
272+
end
273+
269274
return [ :end_document ]
270275
end
271276
return @stack.shift if @stack.size > 0

test/parse/test_attribute_list_declaration.rb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@ class TestParseAttributeListDeclaration < Test::Unit::TestCase
1010
def test_linear_performance_space
1111
seq = [10000, 50000, 100000, 150000, 200000]
1212
assert_linear_performance(seq, rehearsal: 10) do |n|
13-
REXML::Document.new("<!DOCTYPE schema SYSTEM \"foo.dtd\" [<!ATTLIST " +
13+
REXML::Document.new("<!DOCTYPE root SYSTEM \"foo.dtd\" [<!ATTLIST " +
1414
" " * n +
15-
" root v CDATA #FIXED \"test\">]>")
15+
" root v CDATA #FIXED \"test\">]><root/>")
1616
end
1717
end
1818

@@ -23,7 +23,7 @@ def test_linear_performance_tab_and_gt
2323
"\t" * n +
2424
"root value CDATA \"" +
2525
">" * n +
26-
"\">]>")
26+
"\">]><root/>")
2727
end
2828
end
2929
end

test/parse/test_comment.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ def test_after_root
174174
def test_linear_performance_top_level_gt
175175
seq = [10000, 50000, 100000, 150000, 200000]
176176
assert_linear_performance(seq, rehearsal: 10) do |n|
177-
REXML::Document.new('<!-- ' + ">" * n + ' -->')
177+
REXML::Document.new('<!-- ' + ">" * n + ' --><a/>')
178178
end
179179
end
180180

test/parse/test_element.rb

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,45 @@ def parse(xml)
1212
end
1313

1414
class TestInvalid < self
15+
def test_top_level_no_tag
16+
exception = assert_raise(REXML::ParseException) do
17+
parse("")
18+
end
19+
assert_equal(<<-DETAIL.chomp, exception.to_s)
20+
Malformed XML: No root element
21+
Line: 0
22+
Position: 0
23+
Last 80 unconsumed characters:
24+
25+
DETAIL
26+
end
27+
28+
def test_top_level_no_tag_with_xml_declaration
29+
exception = assert_raise(REXML::ParseException) do
30+
parse("<?xml version='1.0'?>")
31+
end
32+
assert_equal(<<-DETAIL.chomp, exception.to_s)
33+
Malformed XML: No root element
34+
Line: 1
35+
Position: 21
36+
Last 80 unconsumed characters:
37+
38+
DETAIL
39+
end
40+
41+
def test_top_level_no_tag_with_comment
42+
exception = assert_raise(REXML::ParseException) do
43+
parse("<!-- comment -->")
44+
end
45+
assert_equal(<<-DETAIL.chomp, exception.to_s)
46+
Malformed XML: No root element
47+
Line: 1
48+
Position: 16
49+
Last 80 unconsumed characters:
50+
51+
DETAIL
52+
end
53+
1554
def test_top_level_end_tag
1655
exception = assert_raise(REXML::ParseException) do
1756
parse("</a>")

test/parse/test_entity_declaration.rb

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -523,7 +523,7 @@ def test_linear_performance_entity_value_gt
523523
assert_linear_performance(seq, rehearsal: 10) do |n|
524524
REXML::Document.new("<!DOCTYPE rubynet [<!ENTITY rbconfig.ruby_version \"" +
525525
">" * n +
526-
"\">]>")
526+
"\">]><rubynet/>")
527527
end
528528
end
529529

@@ -532,7 +532,7 @@ def test_linear_performance_entity_value_gt_right_bracket
532532
assert_linear_performance(seq, rehearsal: 10) do |n|
533533
REXML::Document.new("<!DOCTYPE rubynet [<!ENTITY rbconfig.ruby_version \"" +
534534
">]" * n +
535-
"\">]>")
535+
"\">]><rubynet/>")
536536
end
537537
end
538538

@@ -541,7 +541,7 @@ def test_linear_performance_system_literal_in_system_gt_right_bracket
541541
assert_linear_performance(seq, rehearsal: 10) do |n|
542542
REXML::Document.new("<!DOCTYPE rubynet [<!ENTITY rbconfig.ruby_version SYSTEM \"" +
543543
">]" * n +
544-
"\">]>")
544+
"\">]><rubynet/>")
545545
end
546546
end
547547

@@ -550,7 +550,7 @@ def test_linear_performance_system_literal_in_public_gt_right_bracket
550550
assert_linear_performance(seq, rehearsal: 10) do |n|
551551
REXML::Document.new("<!DOCTYPE rubynet [<!ENTITY rbconfig.ruby_version PUBLIC \"pubid-literal\" \"" +
552552
">]" * n +
553-
"\">]>")
553+
"\">]><rubynet/>")
554554
end
555555
end
556556
end

test/parse/test_processing_instruction.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -237,14 +237,14 @@ def test_content_question
237237
def test_linear_performance_gt
238238
seq = [10000, 50000, 100000, 150000, 200000]
239239
assert_linear_performance(seq, rehearsal: 10) do |n|
240-
REXML::Document.new("<?name content " + ">" * n + " ?>")
240+
REXML::Document.new("<?name content " + ">" * n + " ?><a/>")
241241
end
242242
end
243243

244244
def test_linear_performance_tab
245245
seq = [10000, 50000, 100000, 150000, 200000]
246246
assert_linear_performance(seq, rehearsal: 10) do |n|
247-
REXML::Document.new("<?name" + "\t" * n + "version=\"1.0\" > ?>")
247+
REXML::Document.new("<?name" + "\t" * n + "version=\"1.0\" > ?><a/>")
248248
end
249249
end
250250
end

test/test_contrib.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -472,7 +472,7 @@ def test_maintain_dtd
472472
<!ENTITY % extern-packages SYSTEM "../../common-declarations.dtd">
473473
%extern-packages;
474474
%extern-common;
475-
]>}
475+
]><ivattacks/>}
476476
doc = Document.new( src )
477477
doc.write( out="" )
478478
src = src.tr('"', "'")

test/test_core.rb

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -329,7 +329,7 @@ def test_instruction
329329
REXML::Formatters::Default.new.write( instruction, out = "" )
330330
assert_equal(source, out)
331331

332-
d = Document.new( source )
332+
d = Document.new( source + "<a/>")
333333
instruction2 = d[0]
334334
assert_equal(instruction.to_s, instruction2.to_s)
335335

@@ -875,7 +875,7 @@ def test_entities
875875
def test_element_decl
876876
element_decl = Source.new("<!DOCTYPE foo [
877877
<!ELEMENT bar (#PCDATA)>
878-
]>")
878+
]><foo/>")
879879
doc = Document.new( element_decl )
880880
d = doc[0]
881881
assert_equal("<!ELEMENT bar (#PCDATA)>", d.to_s.split(/\n/)[1].strip)
@@ -1329,7 +1329,7 @@ def test_ticket_53
13291329
end
13301330

13311331
def test_ticket_52
1332-
source = "<!-- this is a single line comment -->"
1332+
source = "<!-- this is a single line comment --><a/>"
13331333
d = REXML::Document.new(source)
13341334
d.write(k="")
13351335
assert_equal( source, k )
@@ -1408,10 +1408,10 @@ def test_ticket_48_part_II
14081408
end
14091409

14101410
def test_ticket_88
1411-
doc = REXML::Document.new("<?xml version=\"1.0\" encoding=\"shift_jis\"?>")
1412-
assert_equal("<?xml version='1.0' encoding='SHIFT_JIS'?>", doc.to_s)
1413-
doc = REXML::Document.new("<?xml version = \"1.0\" encoding = \"shift_jis\"?>")
1414-
assert_equal("<?xml version='1.0' encoding='SHIFT_JIS'?>", doc.to_s)
1411+
doc = REXML::Document.new("<?xml version=\"1.0\" encoding=\"shift_jis\"?><a/>")
1412+
assert_equal("<?xml version='1.0' encoding='SHIFT_JIS'?><a/>", doc.to_s)
1413+
doc = REXML::Document.new("<?xml version = \"1.0\" encoding = \"shift_jis\"?><a/>")
1414+
assert_equal("<?xml version='1.0' encoding='SHIFT_JIS'?><a/>", doc.to_s)
14151415
end
14161416

14171417
def test_ticket_85
@@ -1550,10 +1550,6 @@ def test_ticket_138
15501550
REXML::Document.new(doc.root.to_s).root.attributes.to_h)
15511551
end
15521552

1553-
def test_empty_doc
1554-
assert(REXML::Document.new('').children.empty?)
1555-
end
1556-
15571553
private
15581554
def attribute(name, value)
15591555
REXML::Attribute.new(name, value)

test/test_document.rb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -151,11 +151,11 @@ def test_tag_in_cdata_with_not_ascii_only_but_ascii8bit_encoding_source
151151

152152
def test_xml_declaration_standalone
153153
bug2539 = '[ruby-core:27345]'
154-
doc = REXML::Document.new('<?xml version="1.0" standalone="no" ?>')
154+
doc = REXML::Document.new('<?xml version="1.0" standalone="no" ?><a/>')
155155
assert_equal('no', doc.stand_alone?, bug2539)
156-
doc = REXML::Document.new('<?xml version="1.0" standalone= "no" ?>')
156+
doc = REXML::Document.new('<?xml version="1.0" standalone= "no" ?><a/>')
157157
assert_equal('no', doc.stand_alone?, bug2539)
158-
doc = REXML::Document.new('<?xml version="1.0" standalone= "no" ?>')
158+
doc = REXML::Document.new('<?xml version="1.0" standalone= "no" ?><a/>')
159159
assert_equal('no', doc.stand_alone?, bug2539)
160160
end
161161

test/test_entity.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ def test_constructor
7979
<!ENTITY hatch-pic
8080
SYSTEM "../grafix/OpenHatch.gif"
8181
NDATA gif>
82-
]>}
82+
]><foo/>}
8383

8484
d = REXML::Document.new( source )
8585
dt = d.doctype

0 commit comments

Comments
 (0)