Skip to content

Commit 3da5e6e

Browse files
authored
Merge c33024b into 8231669
2 parents 8231669 + c33024b commit 3da5e6e

File tree

9 files changed

+1149
-1132
lines changed

9 files changed

+1149
-1132
lines changed

domain/model/column_inference.go

Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
// Package model provides domain model for filesql
2+
package model
3+
4+
import (
5+
"regexp"
6+
"strconv"
7+
"strings"
8+
"time"
9+
)
10+
11+
// Common datetime patterns to detect
12+
var datetimePatterns = []struct {
13+
pattern *regexp.Regexp
14+
formats []string // Multiple formats for the same pattern
15+
}{
16+
// ISO8601 formats with timezone
17+
{
18+
regexp.MustCompile(`^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?(Z|[+-]\d{2}:\d{2})$`),
19+
[]string{time.RFC3339, time.RFC3339Nano},
20+
},
21+
// ISO8601 formats without timezone
22+
{
23+
regexp.MustCompile(`^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?$`),
24+
[]string{"2006-01-02T15:04:05", "2006-01-02T15:04:05.000"},
25+
},
26+
// ISO8601 date and time with space
27+
{
28+
regexp.MustCompile(`^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}(\.\d+)?$`),
29+
[]string{"2006-01-02 15:04:05", "2006-01-02 15:04:05.000"},
30+
},
31+
// ISO8601 date only
32+
{
33+
regexp.MustCompile(`^\d{4}-\d{2}-\d{2}$`),
34+
[]string{"2006-01-02"},
35+
},
36+
// US formats
37+
{
38+
regexp.MustCompile(`^\d{1,2}/\d{1,2}/\d{4} \d{1,2}:\d{2}:\d{2}( (AM|PM))?$`),
39+
[]string{"1/2/2006 15:04:05", "1/2/2006 3:04:05 PM", "01/02/2006 15:04:05"},
40+
},
41+
{
42+
regexp.MustCompile(`^\d{1,2}/\d{1,2}/\d{4}$`),
43+
[]string{"1/2/2006", "01/02/2006"},
44+
},
45+
// European formats
46+
{
47+
regexp.MustCompile(`^\d{1,2}\.\d{1,2}\.\d{4} \d{1,2}:\d{2}:\d{2}$`),
48+
[]string{"2.1.2006 15:04:05", "02.01.2006 15:04:05"},
49+
},
50+
{
51+
regexp.MustCompile(`^\d{1,2}\.\d{1,2}\.\d{4}$`),
52+
[]string{"2.1.2006", "02.01.2006"},
53+
},
54+
// Time only
55+
{
56+
regexp.MustCompile(`^\d{1,2}:\d{2}:\d{2}(\.\d+)?$`),
57+
[]string{"15:04:05", "15:04:05.000", "3:04:05"},
58+
},
59+
{
60+
regexp.MustCompile(`^\d{1,2}:\d{2}$`),
61+
[]string{"15:04", "3:04"},
62+
},
63+
}
64+
65+
// isDatetime checks if a string value represents a datetime
66+
func isDatetime(value string) bool {
67+
value = strings.TrimSpace(value)
68+
if value == "" {
69+
return false
70+
}
71+
72+
for _, dp := range datetimePatterns {
73+
if dp.pattern.MatchString(value) {
74+
// Try each format for this pattern
75+
for _, format := range dp.formats {
76+
if _, err := time.Parse(format, value); err == nil {
77+
return true
78+
}
79+
}
80+
}
81+
}
82+
83+
return false
84+
}
85+
86+
// InferColumnType infers the SQL column type from a slice of string values
87+
func InferColumnType(values []string) ColumnType {
88+
if len(values) == 0 {
89+
return ColumnTypeText
90+
}
91+
92+
hasDatetime := false
93+
hasReal := false
94+
hasInteger := false
95+
hasText := false
96+
97+
for _, value := range values {
98+
// Skip empty values for type inference
99+
value = strings.TrimSpace(value)
100+
if value == "" {
101+
continue
102+
}
103+
104+
// Check if it's a datetime first (before checking numbers)
105+
if isDatetime(value) {
106+
hasDatetime = true
107+
continue
108+
}
109+
110+
// Try to parse as integer
111+
if _, err := strconv.ParseInt(value, 10, 64); err == nil {
112+
hasInteger = true
113+
continue
114+
}
115+
116+
// Try to parse as float
117+
if _, err := strconv.ParseFloat(value, 64); err == nil {
118+
hasReal = true
119+
continue
120+
}
121+
122+
// If it's not a number or datetime, it's text
123+
hasText = true
124+
break // If any value is text, the whole column is text
125+
}
126+
127+
// Determine the most appropriate type
128+
// Priority: TEXT > DATETIME > REAL > INTEGER
129+
if hasText {
130+
return ColumnTypeText
131+
}
132+
if hasDatetime {
133+
return ColumnTypeDatetime
134+
}
135+
if hasReal {
136+
return ColumnTypeReal
137+
}
138+
if hasInteger {
139+
return ColumnTypeInteger
140+
}
141+
142+
// Default to TEXT if no values were found
143+
return ColumnTypeText
144+
}
145+
146+
// InferColumnsInfo infers column information from header and data records
147+
func InferColumnsInfo(header Header, records []Record) []ColumnInfo {
148+
columnCount := len(header)
149+
if columnCount == 0 {
150+
return nil
151+
}
152+
153+
columns := make([]ColumnInfo, columnCount)
154+
155+
// Initialize column info with headers
156+
for i, name := range header {
157+
columns[i] = ColumnInfo{
158+
Name: name,
159+
Type: ColumnTypeText, // Default to TEXT
160+
}
161+
}
162+
163+
// If no records, return with TEXT types
164+
if len(records) == 0 {
165+
return columns
166+
}
167+
168+
// Collect values for each column
169+
for i := range columnCount {
170+
var values []string
171+
for _, record := range records {
172+
if i < len(record) {
173+
values = append(values, record[i])
174+
}
175+
}
176+
177+
// Infer type from values
178+
columns[i].Type = InferColumnType(values)
179+
}
180+
181+
return columns
182+
}

0 commit comments

Comments
 (0)