@@ -9,12 +9,18 @@ import (
99 "encoding/csv"
1010 "html"
1111 "io"
12+ "regexp"
13+ "strings"
1214
1315 "code.gitea.io/gitea/modules/markup"
16+ "code.gitea.io/gitea/modules/util"
1417)
1518
19+ var quoteRegexp = regexp .MustCompile (`["'][\s\S]+?["']` )
20+
1621func init () {
1722 markup .RegisterParser (Parser {})
23+
1824}
1925
2026// Parser implements markup.Parser for orgmode
@@ -28,12 +34,13 @@ func (Parser) Name() string {
2834
2935// Extensions implements markup.Parser
3036func (Parser ) Extensions () []string {
31- return []string {".csv" }
37+ return []string {".csv" , ".tsv" }
3238}
3339
3440// Render implements markup.Parser
35- func (Parser ) Render (rawBytes []byte , urlPrefix string , metas map [string ]string , isWiki bool ) []byte {
41+ func (p Parser ) Render (rawBytes []byte , urlPrefix string , metas map [string ]string , isWiki bool ) []byte {
3642 rd := csv .NewReader (bytes .NewReader (rawBytes ))
43+ rd .Comma = p .bestDelimiter (rawBytes )
3744 var tmpBlock bytes.Buffer
3845 tmpBlock .WriteString (`<table class="table">` )
3946 for {
@@ -50,9 +57,57 @@ func (Parser) Render(rawBytes []byte, urlPrefix string, metas map[string]string,
5057 tmpBlock .WriteString (html .EscapeString (field ))
5158 tmpBlock .WriteString ("</td>" )
5259 }
53- tmpBlock .WriteString ("<tr>" )
60+ tmpBlock .WriteString ("</ tr>" )
5461 }
5562 tmpBlock .WriteString ("</table>" )
5663
5764 return tmpBlock .Bytes ()
5865}
66+
67+ // bestDelimiter scores the input CSV data against delimiters, and returns the best match.
68+ // Reads at most 10k bytes & 10 lines.
69+ func (p Parser ) bestDelimiter (data []byte ) rune {
70+ maxLines := 10
71+ maxBytes := util .Min (len (data ), 1e4 )
72+ text := string (data [:maxBytes ])
73+ text = quoteRegexp .ReplaceAllLiteralString (text , "" )
74+ lines := strings .SplitN (text , "\n " , maxLines + 1 )
75+ lines = lines [:util .Min (maxLines , len (lines ))]
76+
77+ delimiters := []rune {',' , ';' , '\t' , '|' }
78+ bestDelim := delimiters [0 ]
79+ bestScore := 0.0
80+ for _ , delim := range delimiters {
81+ score := p .scoreDelimiter (lines , delim )
82+ if score > bestScore {
83+ bestScore = score
84+ bestDelim = delim
85+ }
86+ }
87+
88+ return bestDelim
89+ }
90+
91+ // scoreDelimiter uses a count & regularity metric to evaluate a delimiter against lines of CSV
92+ func (Parser ) scoreDelimiter (lines []string , delim rune ) (score float64 ) {
93+ countTotal := 0
94+ countLineMax := 0
95+ linesNotEqual := 0
96+
97+ for _ , line := range lines {
98+ if len (line ) == 0 {
99+ continue
100+ }
101+
102+ countLine := strings .Count (line , string (delim ))
103+ countTotal += countLine
104+ if countLine != countLineMax {
105+ if countLineMax != 0 {
106+ linesNotEqual ++
107+ }
108+ countLineMax = util .Max (countLine , countLineMax )
109+ }
110+ }
111+
112+ return float64 (countTotal ) * (1 - float64 (linesNotEqual )/ float64 (len (lines )))
113+ }
0 commit comments