@@ -6,18 +6,14 @@ package typesniffer
66import (
77 "bytes"
88 "encoding/binary"
9- "fmt"
10- "io"
119 "net/http"
1210 "regexp"
1311 "slices"
1412 "strings"
15-
16- "code.gitea.io/gitea/modules/util"
13+ "sync"
1714)
1815
19- // Use at most this many bytes to determine Content Type.
20- const sniffLen = 1024
16+ const SniffContentSize = 1024
2117
2218const (
2319 MimeTypeImageSvg = "image/svg+xml"
@@ -26,22 +22,30 @@ const (
2622 MimeTypeApplicationOctetStream = "application/octet-stream"
2723)
2824
29- var (
30- svgComment = regexp .MustCompile (`(?s)<!--.*?-->` )
31- svgTagRegex = regexp .MustCompile (`(?si)\A\s*(?:(<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg\b` )
32- svgTagInXMLRegex = regexp .MustCompile (`(?si)\A<\?xml\b.*?\?>\s*(?:(<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg\b` )
33- )
34-
35- // SniffedType contains information about a blobs type.
25+ var globalVars = sync .OnceValue (func () (ret struct {
26+ svgComment , svgTagRegex , svgTagInXMLRegex * regexp.Regexp
27+ },
28+ ) {
29+ ret .svgComment = regexp .MustCompile (`(?s)<!--.*?-->` )
30+ ret .svgTagRegex = regexp .MustCompile (`(?si)\A\s*(?:(<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg\b` )
31+ ret .svgTagInXMLRegex = regexp .MustCompile (`(?si)\A<\?xml\b.*?\?>\s*(?:(<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg\b` )
32+ return ret
33+ })
34+
35+ // SniffedType contains information about a blob's type.
3636type SniffedType struct {
3737 contentType string
3838}
3939
40- // IsText etects if content format is plain text.
40+ // IsText detects if the content format is text family, including text/ plain, text/html, text/css, etc .
4141func (ct SniffedType ) IsText () bool {
4242 return strings .Contains (ct .contentType , "text/" )
4343}
4444
45+ func (ct SniffedType ) IsTextPlain () bool {
46+ return strings .Contains (ct .contentType , "text/plain" )
47+ }
48+
4549// IsImage detects if data is an image format
4650func (ct SniffedType ) IsImage () bool {
4751 return strings .Contains (ct .contentType , "image/" )
@@ -57,12 +61,12 @@ func (ct SniffedType) IsPDF() bool {
5761 return strings .Contains (ct .contentType , "application/pdf" )
5862}
5963
60- // IsVideo detects if data is an video format
64+ // IsVideo detects if data is a video format
6165func (ct SniffedType ) IsVideo () bool {
6266 return strings .Contains (ct .contentType , "video/" )
6367}
6468
65- // IsAudio detects if data is an video format
69+ // IsAudio detects if data is a video format
6670func (ct SniffedType ) IsAudio () bool {
6771 return strings .Contains (ct .contentType , "audio/" )
6872}
@@ -103,33 +107,34 @@ func detectFileTypeBox(data []byte) (brands []string, found bool) {
103107 return brands , true
104108}
105109
106- // DetectContentType extends http.DetectContentType with more content types. Defaults to text/unknown if input is empty.
110+ // DetectContentType extends http.DetectContentType with more content types. Defaults to text/plain if input is empty.
107111func DetectContentType (data []byte ) SniffedType {
108112 if len (data ) == 0 {
109- return SniffedType {"text/unknown " }
113+ return SniffedType {"text/plain " }
110114 }
111115
112116 ct := http .DetectContentType (data )
113117
114- if len (data ) > sniffLen {
115- data = data [:sniffLen ]
118+ if len (data ) > SniffContentSize {
119+ data = data [:SniffContentSize ]
116120 }
117121
122+ vars := globalVars ()
118123 // SVG is unsupported by http.DetectContentType, https://github.com/golang/go/issues/15888
119124 detectByHTML := strings .Contains (ct , "text/plain" ) || strings .Contains (ct , "text/html" )
120125 detectByXML := strings .Contains (ct , "text/xml" )
121126 if detectByHTML || detectByXML {
122- dataProcessed := svgComment .ReplaceAll (data , nil )
127+ dataProcessed := vars . svgComment .ReplaceAll (data , nil )
123128 dataProcessed = bytes .TrimSpace (dataProcessed )
124- if detectByHTML && svgTagRegex .Match (dataProcessed ) ||
125- detectByXML && svgTagInXMLRegex .Match (dataProcessed ) {
129+ if detectByHTML && vars . svgTagRegex .Match (dataProcessed ) ||
130+ detectByXML && vars . svgTagInXMLRegex .Match (dataProcessed ) {
126131 ct = MimeTypeImageSvg
127132 }
128133 }
129134
130135 if strings .HasPrefix (ct , "audio/" ) && bytes .HasPrefix (data , []byte ("ID3" )) {
131136 // The MP3 detection is quite inaccurate, any content with "ID3" prefix will result in "audio/mpeg".
132- // So remove the "ID3" prefix and detect again, if result is text, then it must be text content.
137+ // So remove the "ID3" prefix and detect again, then if the result is " text", it must be text content.
133138 // This works especially because audio files contain many unprintable/invalid characters like `0x00`
134139 ct2 := http .DetectContentType (data [3 :])
135140 if strings .HasPrefix (ct2 , "text/" ) {
@@ -155,15 +160,3 @@ func DetectContentType(data []byte) SniffedType {
155160 }
156161 return SniffedType {ct }
157162}
158-
159- // DetectContentTypeFromReader guesses the content type contained in the reader.
160- func DetectContentTypeFromReader (r io.Reader ) (SniffedType , error ) {
161- buf := make ([]byte , sniffLen )
162- n , err := util .ReadAtMost (r , buf )
163- if err != nil {
164- return SniffedType {}, fmt .Errorf ("DetectContentTypeFromReader io error: %w" , err )
165- }
166- buf = buf [:n ]
167-
168- return DetectContentType (buf ), nil
169- }
0 commit comments