@@ -124,7 +124,7 @@ private String newStringIntern( char[] cbuf, int off, int len )
124124 // private String elValue[];
125125 private int elNamespaceCount [];
126126
127- private String fileEncoding = "UTF8" ;
127+ private String fileEncoding = null ;
128128
129129 /**
130130 * Make sure that we have enough space to keep element stack if passed size. It will always create one additional
@@ -587,8 +587,8 @@ else if ( FEATURE_XML_ROUNDTRIP.equals( name ) )
587587 }
588588 }
589589
590- /**
591- * Unknown properties are <strong>always</strong> returned as false
590+ /**
591+ * Unknown properties are <strong>always</strong> returned as false
592592 */
593593 @ Override
594594 public boolean getFeature ( String name )
@@ -1596,11 +1596,11 @@ else if ( ch == '&' )
15961596 }
15971597 final int oldStart = posStart + bufAbsoluteStart ;
15981598 final int oldEnd = posEnd + bufAbsoluteStart ;
1599- final char [] resolvedEntity = parseEntityRef ();
1599+ parseEntityRef ();
16001600 if ( tokenize )
16011601 return eventType = ENTITY_REF ;
16021602 // check if replacement text can be resolved !!!
1603- if ( resolvedEntity == null )
1603+ if ( resolvedEntityRefCharBuf == BUF_NOT_RESOLVED )
16041604 {
16051605 if ( entityRefName == null )
16061606 {
@@ -1628,7 +1628,7 @@ else if ( ch == '&' )
16281628 }
16291629 // assert usePC == true;
16301630 // write into PC replacement text - do merge for replacement text!!!!
1631- for ( char aResolvedEntity : resolvedEntity )
1631+ for ( char aResolvedEntity : resolvedEntityRefCharBuf )
16321632 {
16331633 if ( pcEnd >= pc .length )
16341634 {
@@ -2675,9 +2675,28 @@ else if ( ch == '\t' || ch == '\n' || ch == '\r' )
26752675 return ch ;
26762676 }
26772677
2678- private char [] charRefOneCharBuf = new char [1 ];
2678+ // state representing that no entity ref have been resolved
2679+ private static final char [] BUF_NOT_RESOLVED = new char [0 ];
2680+
2681+ // predefined entity refs
2682+ private static final char [] BUF_LT = new char [] { '<' };
2683+ private static final char [] BUF_AMP = new char [] { '&' };
2684+ private static final char [] BUF_GT = new char [] { '>' };
2685+ private static final char [] BUF_APO = new char [] { '\'' };
2686+ private static final char [] BUF_QUOT = new char [] { '"' };
26792687
2680- private char [] parseEntityRef ()
2688+ private char [] resolvedEntityRefCharBuf = BUF_NOT_RESOLVED ;
2689+
2690+ /**
2691+ * parse Entity Ref, either a character entity or one of the predefined name entities.
2692+ *
2693+ * @return the length of the valid found character reference, which may be one of the predefined character reference
2694+ * names (resolvedEntityRefCharBuf contains the replaced chars). Returns the length of the not found entity
2695+ * name, otherwise.
2696+ * @throws XmlPullParserException if invalid XML is detected.
2697+ * @throws IOException if an I/O error is found.
2698+ */
2699+ private int parseCharOrPredefinedEntityRef ()
26812700 throws XmlPullParserException , IOException
26822701 {
26832702 // entity reference http://www.w3.org/TR/2000/REC-xml-20001006#NT-Reference
@@ -2686,6 +2705,8 @@ private char[] parseEntityRef()
26862705 // ASSUMPTION just after &
26872706 entityRefName = null ;
26882707 posStart = pos ;
2708+ int len = 0 ;
2709+ resolvedEntityRefCharBuf = BUF_NOT_RESOLVED ;
26892710 char ch = more ();
26902711 if ( ch == '#' )
26912712 {
@@ -2750,7 +2771,6 @@ else if ( ch >= 'A' && ch <= 'F' )
27502771 ch = more ();
27512772 }
27522773 }
2753- posEnd = pos - 1 ;
27542774
27552775 boolean isValidCodePoint = true ;
27562776 try
@@ -2759,7 +2779,7 @@ else if ( ch >= 'A' && ch <= 'F' )
27592779 isValidCodePoint = isValidCodePoint ( codePoint );
27602780 if ( isValidCodePoint )
27612781 {
2762- charRefOneCharBuf = Character .toChars ( codePoint );
2782+ resolvedEntityRefCharBuf = Character .toChars ( codePoint );
27632783 }
27642784 }
27652785 catch ( IllegalArgumentException e )
@@ -2775,14 +2795,14 @@ else if ( ch >= 'A' && ch <= 'F' )
27752795
27762796 if ( tokenize )
27772797 {
2778- text = newString ( charRefOneCharBuf , 0 , charRefOneCharBuf .length );
2798+ text = newString ( resolvedEntityRefCharBuf , 0 , resolvedEntityRefCharBuf .length );
27792799 }
2780- return charRefOneCharBuf ;
2800+ len = resolvedEntityRefCharBuf . length ;
27812801 }
27822802 else
27832803 {
27842804 // [68] EntityRef ::= '&' Name ';'
2785- // scan anem until ;
2805+ // scan name until ;
27862806 if ( !isNameStartChar ( ch ) )
27872807 {
27882808 throw new XmlPullParserException ( "entity reference names can not start with character '"
@@ -2801,17 +2821,15 @@ else if ( ch >= 'A' && ch <= 'F' )
28012821 + printable ( ch ) + "'" , this , null );
28022822 }
28032823 }
2804- posEnd = pos - 1 ;
28052824 // determine what name maps to
2806- final int len = posEnd - posStart ;
2825+ len = ( pos - 1 ) - posStart ;
28072826 if ( len == 2 && buf [posStart ] == 'l' && buf [posStart + 1 ] == 't' )
28082827 {
28092828 if ( tokenize )
28102829 {
28112830 text = "<" ;
28122831 }
2813- charRefOneCharBuf [0 ] = '<' ;
2814- return charRefOneCharBuf ;
2832+ resolvedEntityRefCharBuf = BUF_LT ;
28152833 // if(paramPC || isParserTokenizing) {
28162834 // if(pcEnd >= pc.length) ensurePC();
28172835 // pc[pcEnd++] = '<';
@@ -2823,17 +2841,15 @@ else if ( len == 3 && buf[posStart] == 'a' && buf[posStart + 1] == 'm' && buf[po
28232841 {
28242842 text = "&" ;
28252843 }
2826- charRefOneCharBuf [0 ] = '&' ;
2827- return charRefOneCharBuf ;
2844+ resolvedEntityRefCharBuf = BUF_AMP ;
28282845 }
28292846 else if ( len == 2 && buf [posStart ] == 'g' && buf [posStart + 1 ] == 't' )
28302847 {
28312848 if ( tokenize )
28322849 {
28332850 text = ">" ;
28342851 }
2835- charRefOneCharBuf [0 ] = '>' ;
2836- return charRefOneCharBuf ;
2852+ resolvedEntityRefCharBuf = BUF_GT ;
28372853 }
28382854 else if ( len == 4 && buf [posStart ] == 'a' && buf [posStart + 1 ] == 'p' && buf [posStart + 2 ] == 'o'
28392855 && buf [posStart + 3 ] == 's' )
@@ -2842,8 +2858,7 @@ else if ( len == 4 && buf[posStart] == 'a' && buf[posStart + 1] == 'p' && buf[po
28422858 {
28432859 text = "'" ;
28442860 }
2845- charRefOneCharBuf [0 ] = '\'' ;
2846- return charRefOneCharBuf ;
2861+ resolvedEntityRefCharBuf = BUF_APO ;
28472862 }
28482863 else if ( len == 4 && buf [posStart ] == 'q' && buf [posStart + 1 ] == 'u' && buf [posStart + 2 ] == 'o'
28492864 && buf [posStart + 3 ] == 't' )
@@ -2852,25 +2867,65 @@ else if ( len == 4 && buf[posStart] == 'q' && buf[posStart + 1] == 'u' && buf[po
28522867 {
28532868 text = "\" " ;
28542869 }
2855- charRefOneCharBuf [0 ] = '"' ;
2856- return charRefOneCharBuf ;
2857- }
2858- else
2859- {
2860- final char [] result = lookuEntityReplacement ( len );
2861- if ( result != null )
2862- {
2863- return result ;
2864- }
2870+ resolvedEntityRefCharBuf = BUF_QUOT ;
28652871 }
2866- if ( tokenize )
2867- text = null ;
2868- return null ;
28692872 }
2873+
2874+ posEnd = pos ;
2875+
2876+ return len ;
2877+ }
2878+
2879+ /**
2880+ * Parse an entity reference inside the DOCDECL section.
2881+ *
2882+ * @throws XmlPullParserException if invalid XML is detected.
2883+ * @throws IOException if an I/O error is found.
2884+ */
2885+ private void parseEntityRefInDocDecl ()
2886+ throws XmlPullParserException , IOException
2887+ {
2888+ parseCharOrPredefinedEntityRef ();
2889+ if (usePC ) {
2890+ posStart --; // include in PC the starting '&' of the entity
2891+ joinPC ();
2892+ }
2893+
2894+ if ( resolvedEntityRefCharBuf != BUF_NOT_RESOLVED )
2895+ return ;
2896+ if ( tokenize )
2897+ text = null ;
2898+ }
2899+
2900+ /**
2901+ * Parse an entity reference inside a tag or attribute.
2902+ *
2903+ * @throws XmlPullParserException if invalid XML is detected.
2904+ * @throws IOException if an I/O error is found.
2905+ */
2906+ private void parseEntityRef ()
2907+ throws XmlPullParserException , IOException
2908+ {
2909+ final int len = parseCharOrPredefinedEntityRef ();
2910+
2911+ posEnd --; // don't involve the final ';' from the entity in the search
2912+
2913+ if ( resolvedEntityRefCharBuf != BUF_NOT_RESOLVED ) {
2914+ return ;
2915+ }
2916+
2917+ resolvedEntityRefCharBuf = lookuEntityReplacement ( len );
2918+ if ( resolvedEntityRefCharBuf != BUF_NOT_RESOLVED )
2919+ {
2920+ return ;
2921+ }
2922+ if ( tokenize )
2923+ text = null ;
28702924 }
28712925
28722926 /**
2873- * Check if the provided parameter is a valid Char, according to: {@link https://www.w3.org/TR/REC-xml/#NT-Char}
2927+ * Check if the provided parameter is a valid Char. According to
2928+ * <a href="https://www.w3.org/TR/REC-xml/#NT-Char">https://www.w3.org/TR/REC-xml/#NT-Char</a>
28742929 *
28752930 * @param codePoint the numeric value to check
28762931 * @return true if it is a valid numeric character reference. False otherwise.
@@ -2883,8 +2938,6 @@ private static boolean isValidCodePoint( int codePoint )
28832938 }
28842939
28852940 private char [] lookuEntityReplacement ( int entityNameLen )
2886- throws XmlPullParserException , IOException
2887-
28882941 {
28892942 if ( !allStringsInterned )
28902943 {
@@ -2919,7 +2972,7 @@ private char[] lookuEntityReplacement( int entityNameLen )
29192972 }
29202973 }
29212974 }
2922- return null ;
2975+ return BUF_NOT_RESOLVED ;
29232976 }
29242977
29252978 private void parseComment ()
@@ -2977,7 +3030,7 @@ else if (isValidCodePoint( ch ))
29773030 }
29783031 else
29793032 {
2980- throw new XmlPullParserException ( "Illegal character 0x" + Integer .toHexString ((( int ) ch ) ) + " found in comment" , this , null );
3033+ throw new XmlPullParserException ( "Illegal character 0x" + Integer .toHexString (ch ) + " found in comment" , this , null );
29813034 }
29823035 if ( normalizeIgnorableWS )
29833036 {
@@ -3484,7 +3537,8 @@ else if ( ch == '>' && bracketLevel == 0 )
34843537 break ;
34853538 else if ( ch == '&' )
34863539 {
3487- extractEntityRef ();
3540+ extractEntityRefInDocDecl ();
3541+ continue ;
34883542 }
34893543 if ( normalizeIgnorableWS )
34903544 {
@@ -3536,6 +3590,19 @@ else if ( ch == '\n' )
35363590
35373591 }
35383592 posEnd = pos - 1 ;
3593+ text = null ;
3594+ }
3595+
3596+ private void extractEntityRefInDocDecl ()
3597+ throws XmlPullParserException , IOException
3598+ {
3599+ // extractEntityRef
3600+ posEnd = pos - 1 ;
3601+
3602+ int prevPosStart = posStart ;
3603+ parseEntityRefInDocDecl ();
3604+
3605+ posStart = prevPosStart ;
35393606 }
35403607
35413608 private void extractEntityRef ()
@@ -3559,9 +3626,9 @@ private void extractEntityRef()
35593626 }
35603627 // assert usePC == true;
35613628
3562- final char [] resolvedEntity = parseEntityRef ();
3629+ parseEntityRef ();
35633630 // check if replacement text can be resolved !!!
3564- if ( resolvedEntity == null )
3631+ if ( resolvedEntityRefCharBuf == BUF_NOT_RESOLVED )
35653632 {
35663633 if ( entityRefName == null )
35673634 {
@@ -3571,7 +3638,7 @@ private void extractEntityRef()
35713638 + "'" , this , null );
35723639 }
35733640 // write into PC replacement text - do merge for replacement text!!!!
3574- for ( char aResolvedEntity : resolvedEntity )
3641+ for ( char aResolvedEntity : resolvedEntityRefCharBuf )
35753642 {
35763643 if ( pcEnd >= pc .length )
35773644 {
0 commit comments