@@ -144,6 +144,7 @@ module Private
144
144
PEREFERENCE_PATTERN = /#{ PEREFERENCE } /um
145
145
TAG_PATTERN = /((?>#{ QNAME_STR } ))\s */um
146
146
CLOSE_PATTERN = /(#{ QNAME_STR } )\s *>/um
147
+ EQUAL_PATTERN = /\s *=\s */um
147
148
ATTLISTDECL_END = /\s +#{ NAME } (?:#{ ATTDEF } )*\s *>/um
148
149
NAME_PATTERN = /#{ NAME } /um
149
150
GEDECL_PATTERN = "\\ s+#{ NAME } \\ s+#{ ENTITYDEF } \\ s*>"
@@ -168,6 +169,7 @@ def initialize( source )
168
169
@entity_expansion_limit = Security . entity_expansion_limit
169
170
@entity_expansion_text_limit = Security . entity_expansion_text_limit
170
171
@source . ensure_buffer
172
+ @version = nil
171
173
end
172
174
173
175
def add_listener ( listener )
@@ -280,7 +282,7 @@ def pull_event
280
282
return [ :comment , process_comment ]
281
283
elsif @source . match? ( "DOCTYPE" , true )
282
284
base_error_message = "Malformed DOCTYPE"
283
- unless @source . match? ( / \s +/um , true )
285
+ unless @source . skip_spaces
284
286
if @source . match? ( ">" )
285
287
message = "#{ base_error_message } : name is missing"
286
288
else
@@ -290,7 +292,7 @@ def pull_event
290
292
raise REXML ::ParseException . new ( message , @source )
291
293
end
292
294
name = parse_name ( base_error_message )
293
- @source . match? ( / \s */um , true ) # skip spaces
295
+ @source . skip_spaces
294
296
if @source . match? ( "[" , true )
295
297
id = [ nil , nil , nil ]
296
298
@document_status = :in_doctype
@@ -306,7 +308,7 @@ def pull_event
306
308
# For backward compatibility
307
309
id [ 1 ] , id [ 2 ] = id [ 2 ] , nil
308
310
end
309
- @source . match? ( / \s */um , true ) # skip spaces
311
+ @source . skip_spaces
310
312
if @source . match? ( "[" , true )
311
313
@document_status = :in_doctype
312
314
elsif @source . match? ( ">" , true )
@@ -319,7 +321,7 @@ def pull_event
319
321
end
320
322
args = [ :start_doctype , name , *id ]
321
323
if @document_status == :after_doctype
322
- @source . match? ( / \s */um , true )
324
+ @source . skip_spaces
323
325
@stack << [ :end_doctype ]
324
326
end
325
327
return args
@@ -330,7 +332,7 @@ def pull_event
330
332
end
331
333
end
332
334
if @document_status == :in_doctype
333
- @source . match? ( / \s */um , true ) # skip spaces
335
+ @source . skip_spaces
334
336
start_position = @source . position
335
337
if @source . match? ( "<!" , true )
336
338
if @source . match? ( "ELEMENT" , true )
@@ -391,7 +393,7 @@ def pull_event
391
393
return [ :attlistdecl , element , pairs , contents ]
392
394
elsif @source . match? ( "NOTATION" , true )
393
395
base_error_message = "Malformed notation declaration"
394
- unless @source . match? ( / \s +/um , true )
396
+ unless @source . skip_spaces
395
397
if @source . match? ( ">" )
396
398
message = "#{ base_error_message } : name is missing"
397
399
else
@@ -404,7 +406,7 @@ def pull_event
404
406
id = parse_id ( base_error_message ,
405
407
accept_external_id : true ,
406
408
accept_public_id : true )
407
- @source . match? ( / \s */um , true ) # skip spaces
409
+ @source . skip_spaces
408
410
unless @source . match? ( ">" , true )
409
411
message = "#{ base_error_message } : garbage before end >"
410
412
raise REXML ::ParseException . new ( message , @source )
@@ -425,7 +427,7 @@ def pull_event
425
427
end
426
428
end
427
429
if @document_status == :after_doctype
428
- @source . match? ( / \s */um , true )
430
+ @source . skip_spaces
429
431
end
430
432
begin
431
433
start_position = @source . position
@@ -642,6 +644,10 @@ def need_source_encoding_update?(xml_declaration_encoding)
642
644
true
643
645
end
644
646
647
+ def normalize_xml_declaration_encoding ( xml_declaration_encoding )
648
+ /\A UTF-16(?:BE|LE)\z /i . match? ( xml_declaration_encoding ) ? "UTF-16" : nil
649
+ end
650
+
645
651
def parse_name ( base_error_message )
646
652
md = @source . match ( Private ::NAME_PATTERN , true )
647
653
unless md
@@ -735,37 +741,85 @@ def process_comment
735
741
736
742
def process_instruction
737
743
name = parse_name ( "Malformed XML: Invalid processing instruction node" )
738
- if @source . match? ( /\s +/um , true )
739
- match_data = @source . match ( /(.*?)\? >/um , true )
740
- unless match_data
741
- raise ParseException . new ( "Malformed XML: Unclosed processing instruction" , @source )
744
+ if name == "xml"
745
+ xml_declaration
746
+ else # PITarget
747
+ if @source . skip_spaces # e.g. <?name content?>
748
+ start_position = @source . position
749
+ content = @source . read_until ( "?>" )
750
+ unless content . chomp! ( "?>" )
751
+ @source . position = start_position
752
+ raise ParseException . new ( "Malformed XML: Unclosed processing instruction: <#{ name } >" , @source )
753
+ end
754
+ else # e.g. <?name?>
755
+ content = nil
756
+ unless @source . match? ( "?>" , true )
757
+ raise ParseException . new ( "Malformed XML: Unclosed processing instruction: <#{ name } >" , @source )
758
+ end
742
759
end
743
- content = match_data [ 1 ]
744
- else
745
- content = nil
760
+ [ :processing_instruction , name , content ]
761
+ end
762
+ end
763
+
764
+ def xml_declaration
765
+ unless @version . nil?
766
+ raise ParseException . new ( "Malformed XML: XML declaration is duplicated" , @source )
767
+ end
768
+ if @document_status
769
+ raise ParseException . new ( "Malformed XML: XML declaration is not at the start" , @source )
770
+ end
771
+ unless @source . skip_spaces
772
+ raise ParseException . new ( "Malformed XML: XML declaration misses spaces before version" , @source )
773
+ end
774
+ unless @source . match? ( "version" , true )
775
+ raise ParseException . new ( "Malformed XML: XML declaration misses version" , @source )
776
+ end
777
+ @version = parse_attribute_value_with_equal ( "xml" )
778
+ unless @source . skip_spaces
746
779
unless @source . match? ( "?>" , true )
747
- raise ParseException . new ( "Malformed XML: Unclosed processing instruction " , @source )
780
+ raise ParseException . new ( "Malformed XML: Unclosed XML declaration " , @source )
748
781
end
782
+ encoding = normalize_xml_declaration_encoding ( @source . encoding )
783
+ return [ :xmldecl , @version , encoding , nil ] # e.g. <?xml version="1.0"?>
749
784
end
750
- if name == "xml"
751
- if @document_status
752
- raise ParseException . new ( "Malformed XML: XML declaration is not at the start" , @source )
753
- end
754
- version = VERSION . match ( content )
755
- version = version [ 1 ] unless version . nil?
756
- encoding = ENCODING . match ( content )
757
- encoding = encoding [ 1 ] unless encoding . nil?
758
- if need_source_encoding_update? ( encoding )
759
- @source . encoding = encoding
785
+
786
+ if @source . match? ( "encoding" , true )
787
+ encoding = parse_attribute_value_with_equal ( "xml" )
788
+ unless @source . skip_spaces
789
+ unless @source . match? ( "?>" , true )
790
+ raise ParseException . new ( "Malformed XML: Unclosed XML declaration" , @source )
791
+ end
792
+ if need_source_encoding_update? ( encoding )
793
+ @source . encoding = encoding
794
+ end
795
+ encoding ||= normalize_xml_declaration_encoding ( @source . encoding )
796
+ return [ :xmldecl , @version , encoding , nil ] # e.g. <?xml version="1.1" encoding="UTF-8"?>
760
797
end
761
- if encoding . nil? and /\A UTF-16(?:BE|LE)\z /i =~ @source . encoding
762
- encoding = "UTF-16"
798
+ end
799
+
800
+ if @source . match? ( "standalone" , true )
801
+ standalone = parse_attribute_value_with_equal ( "xml" )
802
+ case standalone
803
+ when "yes" , "no"
804
+ else
805
+ raise ParseException . new ( "Malformed XML: XML declaration standalone is not yes or no : <#{ standalone } >" , @source )
763
806
end
764
- standalone = STANDALONE . match ( content )
765
- standalone = standalone [ 1 ] unless standalone . nil?
766
- return [ :xmldecl , version , encoding , standalone ]
767
807
end
768
- [ :processing_instruction , name , content ]
808
+ @source . skip_spaces
809
+ unless @source . match? ( "?>" , true )
810
+ raise ParseException . new ( "Malformed XML: Unclosed XML declaration" , @source )
811
+ end
812
+
813
+ if need_source_encoding_update? ( encoding )
814
+ @source . encoding = encoding
815
+ end
816
+ encoding ||= normalize_xml_declaration_encoding ( @source . encoding )
817
+
818
+ # e.g. <?xml version="1.0" ?>
819
+ # <?xml version="1.1" encoding="UTF-8" ?>
820
+ # <?xml version="1.1" standalone="yes"?>
821
+ # <?xml version="1.1" encoding="UTF-8" standalone="yes" ?>
822
+ [ :xmldecl , @version , encoding , standalone ]
769
823
end
770
824
771
825
if StringScanner ::Version < "3.1.1"
@@ -787,6 +841,25 @@ def scan_quote
787
841
end
788
842
end
789
843
844
+ def parse_attribute_value_with_equal ( name )
845
+ unless @source . match? ( Private ::EQUAL_PATTERN , true )
846
+ message = "Missing attribute equal: <#{ name } >"
847
+ raise REXML ::ParseException . new ( message , @source )
848
+ end
849
+ unless quote = scan_quote
850
+ message = "Missing attribute value start quote: <#{ name } >"
851
+ raise REXML ::ParseException . new ( message , @source )
852
+ end
853
+ start_position = @source . position
854
+ value = @source . read_until ( quote )
855
+ unless value . chomp! ( quote )
856
+ @source . position = start_position
857
+ message = "Missing attribute value end quote: <#{ name } >: <#{ quote } >"
858
+ raise REXML ::ParseException . new ( message , @source )
859
+ end
860
+ value
861
+ end
862
+
790
863
def parse_attributes ( prefixes )
791
864
attributes = { }
792
865
expanded_names = { }
@@ -801,23 +874,8 @@ def parse_attributes(prefixes)
801
874
name = match [ 1 ]
802
875
prefix = match [ 2 ]
803
876
local_part = match [ 3 ]
804
-
805
- unless @source . match? ( /\s *=\s */um , true )
806
- message = "Missing attribute equal: <#{ name } >"
807
- raise REXML ::ParseException . new ( message , @source )
808
- end
809
- unless quote = scan_quote
810
- message = "Missing attribute value start quote: <#{ name } >"
811
- raise REXML ::ParseException . new ( message , @source )
812
- end
813
- start_position = @source . position
814
- value = @source . read_until ( quote )
815
- unless value . chomp! ( quote )
816
- @source . position = start_position
817
- message = "Missing attribute value end quote: <#{ name } >: <#{ quote } >"
818
- raise REXML ::ParseException . new ( message , @source )
819
- end
820
- @source . match? ( /\s */um , true )
877
+ value = parse_attribute_value_with_equal ( name )
878
+ @source . skip_spaces
821
879
if prefix == "xmlns"
822
880
if local_part == "xml"
823
881
if value != Private ::XML_PREFIXED_NAMESPACE
0 commit comments