@@ -135,6 +135,7 @@ class Demangler {
135135 void printDecimalNumber (uint64_t N);
136136 void printBasicType (BasicType);
137137 void printLifetime (uint64_t Index);
138+ void printIdentifier (Identifier Ident);
138139
139140 char look () const ;
140141 char consume ();
@@ -283,8 +284,7 @@ bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) {
283284 switch (consume ()) {
284285 case ' C' : {
285286 parseOptionalBase62Number (' s' );
286- Identifier Ident = parseIdentifier ();
287- print (Ident.Name );
287+ printIdentifier (parseIdentifier ());
288288 break ;
289289 }
290290 case ' M' : {
@@ -333,7 +333,7 @@ bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) {
333333 print (NS);
334334 if (!Ident.empty ()) {
335335 print (" :" );
336- print (Ident. Name );
336+ printIdentifier (Ident);
337337 }
338338 print (' #' );
339339 printDecimalNumber (Disambiguator);
@@ -342,7 +342,7 @@ bool Demangler::demanglePath(IsInType InType, LeaveGenericsOpen LeaveOpen) {
342342 // Implementation internal namespaces.
343343 if (!Ident.empty ()) {
344344 print (" ::" );
345- print (Ident. Name );
345+ printIdentifier (Ident);
346346 }
347347 }
348348 break ;
@@ -669,6 +669,8 @@ void Demangler::demangleFnSig() {
669669 print (" C" );
670670 } else {
671671 Identifier Ident = parseIdentifier ();
672+ if (Ident.Punycode )
673+ Error = true ;
672674 for (char C : Ident.Name ) {
673675 // When mangling ABI string, the "-" is replaced with "_".
674676 if (C == ' _' )
@@ -1078,6 +1080,172 @@ void Demangler::printLifetime(uint64_t Index) {
10781080 }
10791081}
10801082
1083+ static inline bool decodePunycodeDigit (char C, size_t &Value) {
1084+ if (isLower (C)) {
1085+ Value = C - ' a' ;
1086+ return true ;
1087+ }
1088+
1089+ if (isDigit (C)) {
1090+ Value = 26 + (C - ' 0' );
1091+ return true ;
1092+ }
1093+
1094+ return false ;
1095+ }
1096+
1097+ static void removeNullBytes (OutputStream &Output, size_t StartIdx) {
1098+ char *Buffer = Output.getBuffer ();
1099+ char *Start = Buffer + StartIdx;
1100+ char *End = Buffer + Output.getCurrentPosition ();
1101+ Output.setCurrentPosition (std::remove (Start, End, ' \0 ' ) - Buffer);
1102+ }
1103+
1104+ // Encodes code point as UTF-8 and stores results in Output. Returns false if
1105+ // CodePoint is not a valid unicode scalar value.
1106+ static inline bool encodeUTF8 (size_t CodePoint, char *Output) {
1107+ if (0xD800 <= CodePoint && CodePoint <= 0xDFFF )
1108+ return false ;
1109+
1110+ if (CodePoint <= 0x7F ) {
1111+ Output[0 ] = CodePoint;
1112+ return true ;
1113+ }
1114+
1115+ if (CodePoint <= 0x7FF ) {
1116+ Output[0 ] = 0xC0 | ((CodePoint >> 6 ) & 0x3F );
1117+ Output[1 ] = 0x80 | (CodePoint & 0x3F );
1118+ return true ;
1119+ }
1120+
1121+ if (CodePoint <= 0xFFFF ) {
1122+ Output[0 ] = 0xE0 | (CodePoint >> 12 );
1123+ Output[1 ] = 0x80 | ((CodePoint >> 6 ) & 0x3F );
1124+ Output[2 ] = 0x80 | (CodePoint & 0x3F );
1125+ return true ;
1126+ }
1127+
1128+ if (CodePoint <= 0x10FFFF ) {
1129+ Output[0 ] = 0xF0 | (CodePoint >> 18 );
1130+ Output[1 ] = 0x80 | ((CodePoint >> 12 ) & 0x3F );
1131+ Output[2 ] = 0x80 | ((CodePoint >> 6 ) & 0x3F );
1132+ Output[3 ] = 0x80 | (CodePoint & 0x3F );
1133+ return true ;
1134+ }
1135+
1136+ return false ;
1137+ }
1138+
1139+ // Decodes string encoded using punycode and appends results to Output.
1140+ // Returns true if decoding was successful.
1141+ static bool decodePunycode (StringView Input, OutputStream &Output) {
1142+ size_t OutputSize = Output.getCurrentPosition ();
1143+ size_t InputIdx = 0 ;
1144+
1145+ // Rust uses an underscore as a delimiter.
1146+ size_t DelimiterPos = StringView::npos;
1147+ for (size_t I = 0 ; I != Input.size (); ++I)
1148+ if (Input[I] == ' _' )
1149+ DelimiterPos = I;
1150+
1151+ if (DelimiterPos != StringView::npos) {
1152+ // Copy basic code points before the last delimiter to the output.
1153+ for (; InputIdx != DelimiterPos; ++InputIdx) {
1154+ char C = Input[InputIdx];
1155+ if (!isValid (C))
1156+ return false ;
1157+ // Code points are padded with zeros while decoding is in progress.
1158+ char UTF8[4 ] = {C};
1159+ Output += StringView (UTF8, UTF8 + 4 );
1160+ }
1161+ // Skip over the delimiter.
1162+ ++InputIdx;
1163+ }
1164+
1165+ size_t Base = 36 ;
1166+ size_t Skew = 38 ;
1167+ size_t Bias = 72 ;
1168+ size_t N = 0x80 ;
1169+ size_t TMin = 1 ;
1170+ size_t TMax = 26 ;
1171+ size_t Damp = 700 ;
1172+
1173+ auto Adapt = [&](size_t Delta, size_t NumPoints) {
1174+ Delta /= Damp;
1175+ Delta += Delta / NumPoints;
1176+ Damp = 2 ;
1177+
1178+ size_t K = 0 ;
1179+ while (Delta > (Base - TMin) * TMax / 2 ) {
1180+ Delta /= Base - TMin;
1181+ K += Base;
1182+ }
1183+ return K + (((Base - TMin + 1 ) * Delta) / (Delta + Skew));
1184+ };
1185+
1186+ // Main decoding loop.
1187+ for (size_t I = 0 ; InputIdx != Input.size (); I += 1 ) {
1188+ size_t OldI = I;
1189+ size_t W = 1 ;
1190+ size_t Max = std::numeric_limits<size_t >::max ();
1191+ for (size_t K = Base; true ; K += Base) {
1192+ if (InputIdx == Input.size ())
1193+ return false ;
1194+ char C = Input[InputIdx++];
1195+ size_t Digit = 0 ;
1196+ if (!decodePunycodeDigit (C, Digit))
1197+ return false ;
1198+
1199+ if (Digit > (Max - I) / W)
1200+ return false ;
1201+ I += Digit * W;
1202+
1203+ size_t T;
1204+ if (K <= Bias)
1205+ T = TMin;
1206+ else if (K >= Bias + TMax)
1207+ T = TMax;
1208+ else
1209+ T = K - Bias;
1210+
1211+ if (Digit < T)
1212+ break ;
1213+
1214+ if (W > Max / (Base - T))
1215+ return false ;
1216+ W *= (Base - T);
1217+ }
1218+ size_t NumPoints = (Output.getCurrentPosition () - OutputSize) / 4 + 1 ;
1219+ Bias = Adapt (I - OldI, NumPoints);
1220+
1221+ if (I / NumPoints > Max - N)
1222+ return false ;
1223+ N += I / NumPoints;
1224+ I = I % NumPoints;
1225+
1226+ // Insert N at position I in the output.
1227+ char UTF8[4 ] = {};
1228+ if (!encodeUTF8 (N, UTF8))
1229+ return false ;
1230+ Output.insert (OutputSize + I * 4 , UTF8, 4 );
1231+ }
1232+
1233+ removeNullBytes (Output, OutputSize);
1234+ return true ;
1235+ }
1236+
1237+ void Demangler::printIdentifier (Identifier Ident) {
1238+ if (Error || !Print)
1239+ return ;
1240+
1241+ if (Ident.Punycode ) {
1242+ if (!decodePunycode (Ident.Name , Output))
1243+ Error = true ;
1244+ } else {
1245+ print (Ident.Name );
1246+ }
1247+ }
1248+
10811249char Demangler::look () const {
10821250 if (Error || Position >= Input.size ())
10831251 return 0 ;
0 commit comments