@@ -16,13 +16,13 @@ internal static class ExifEncodedStringHelpers
1616 private const ulong UnicodeCode = 0x_45_44_4F_43_49_4E_55 ;
1717 private const ulong UndefinedCode = 0x_00_00_00_00_00_00_00_00 ;
1818
19- private static ReadOnlySpan < byte > AsciiCodeBytes => new byte [ ] { 0x41 , 0x53 , 0x43 , 0x49 , 0x49 , 0 , 0 , 0 } ;
19+ private static ReadOnlySpan < byte > AsciiCodeBytes => [ 0x41 , 0x53 , 0x43 , 0x49 , 0x49 , 0 , 0 , 0 ] ;
2020
21- private static ReadOnlySpan < byte > JISCodeBytes => new byte [ ] { 0x4A , 0x49 , 0x53 , 0 , 0 , 0 , 0 , 0 } ;
21+ private static ReadOnlySpan < byte > JISCodeBytes => [ 0x4A , 0x49 , 0x53 , 0 , 0 , 0 , 0 , 0 ] ;
2222
23- private static ReadOnlySpan < byte > UnicodeCodeBytes => new byte [ ] { 0x55 , 0x4E , 0x49 , 0x43 , 0x4F , 0x44 , 0x45 , 0 } ;
23+ private static ReadOnlySpan < byte > UnicodeCodeBytes => [ 0x55 , 0x4E , 0x49 , 0x43 , 0x4F , 0x44 , 0x45 , 0 ] ;
2424
25- private static ReadOnlySpan < byte > UndefinedCodeBytes => new byte [ ] { 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 } ;
25+ private static ReadOnlySpan < byte > UndefinedCodeBytes => [ 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ] ;
2626
2727 // 20932 EUC-JP Japanese (JIS 0208-1990 and 0212-1990)
2828 // https://docs.microsoft.com/en-us/dotnet/api/system.text.encoding?view=net-6.0
@@ -50,37 +50,60 @@ private static Encoding JIS0208Encoding
5050 _ => UndefinedCodeBytes
5151 } ;
5252
53- public static Encoding GetEncoding ( CharacterCode code ) => code switch
53+ public static Encoding GetEncoding ( CharacterCode code , ByteOrder order ) => code switch
5454 {
5555 CharacterCode . ASCII => Encoding . ASCII ,
5656 CharacterCode . JIS => JIS0208Encoding ,
57- CharacterCode . Unicode => Encoding . Unicode ,
57+ CharacterCode . Unicode => order is ByteOrder . BigEndian ? Encoding . BigEndianUnicode : Encoding . Unicode ,
5858 CharacterCode . Undefined => Encoding . UTF8 ,
5959 _ => Encoding . UTF8
6060 } ;
6161
62- public static bool TryParse ( ReadOnlySpan < byte > buffer , out EncodedString encodedString )
62+ public static bool TryParse ( ReadOnlySpan < byte > buffer , ByteOrder order , out EncodedString encodedString )
6363 {
6464 if ( TryDetect ( buffer , out CharacterCode code ) )
6565 {
66- string text = GetEncoding ( code ) . GetString ( buffer [ CharacterCodeBytesLength ..] ) ;
67- encodedString = new EncodedString ( code , text ) ;
68- return true ;
66+ ReadOnlySpan < byte > textBuffer = buffer [ CharacterCodeBytesLength ..] ;
67+ if ( code == CharacterCode . Unicode && textBuffer . Length >= 2 )
68+ {
69+ // Check BOM
70+ if ( textBuffer . StartsWith ( ( ReadOnlySpan < byte > ) [ 0xFF , 0xFE ] ) )
71+ {
72+ // Little-endian BOM
73+ string text = Encoding . Unicode . GetString ( textBuffer [ 2 ..] ) ;
74+ encodedString = new EncodedString ( code , text ) ;
75+ return true ;
76+ }
77+
78+ if ( textBuffer . StartsWith ( ( ReadOnlySpan < byte > ) [ 0xFE , 0xFF ] ) )
79+ {
80+ // Big-endian BOM
81+ string text = Encoding . BigEndianUnicode . GetString ( textBuffer [ 2 ..] ) ;
82+ encodedString = new EncodedString ( code , text ) ;
83+ return true ;
84+ }
85+ }
86+
87+ {
88+ string text = GetEncoding ( code , order ) . GetString ( textBuffer ) ;
89+ encodedString = new EncodedString ( code , text ) ;
90+ return true ;
91+ }
6992 }
7093
7194 encodedString = default ;
7295 return false ;
7396 }
7497
7598 public static uint GetDataLength ( EncodedString encodedString ) =>
76- ( uint ) GetEncoding ( encodedString . Code ) . GetByteCount ( encodedString . Text ) + CharacterCodeBytesLength ;
99+ ( uint ) GetEncoding ( encodedString . Code , ByteOrder . LittleEndian ) . GetByteCount ( encodedString . Text ) + CharacterCodeBytesLength ;
77100
78101 public static int Write ( EncodedString encodedString , Span < byte > destination )
79102 {
80103 GetCodeBytes ( encodedString . Code ) . CopyTo ( destination ) ;
81104
82105 string text = encodedString . Text ;
83- int count = Write ( GetEncoding ( encodedString . Code ) , text , destination [ CharacterCodeBytesLength ..] ) ;
106+ int count = Write ( GetEncoding ( encodedString . Code , ByteOrder . LittleEndian ) , text , destination [ CharacterCodeBytesLength ..] ) ;
84107
85108 return CharacterCodeBytesLength + count ;
86109 }
@@ -92,8 +115,7 @@ private static bool TryDetect(ReadOnlySpan<byte> buffer, out CharacterCode code)
92115 {
93116 if ( buffer . Length >= CharacterCodeBytesLength )
94117 {
95- ulong test = BinaryPrimitives . ReadUInt64LittleEndian ( buffer ) ;
96- switch ( test )
118+ switch ( BinaryPrimitives . ReadUInt64LittleEndian ( buffer ) )
97119 {
98120 case AsciiCode :
99121 code = CharacterCode . ASCII ;
@@ -108,7 +130,8 @@ private static bool TryDetect(ReadOnlySpan<byte> buffer, out CharacterCode code)
108130 code = CharacterCode . Undefined ;
109131 return true ;
110132 default :
111- break ;
133+ code = default ;
134+ return false ;
112135 }
113136 }
114137
0 commit comments