Skip to content

Commit dbcc041

Browse files
committed
Add coerce and output format options for numeric fields
1 parent dcbe67a commit dbcc041

File tree

16 files changed

+315
-75
lines changed

16 files changed

+315
-75
lines changed

docs/configuration/index-config.md

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ Indexing with position is required to run phrase queries.
164164

165165
Quickwit handles three numeric types: `i64`, `u64`, and `f64`.
166166

167-
Numeric values can be stored in a fast field (the equivalent of Lucene's `DocValues`) which is a column-oriented storage.
167+
Numeric values can be stored in a fast field (the equivalent of Lucene's `DocValues`), which is a column-oriented storage used for range queries and aggregations.
168168

169169
Example of a mapping for an u64 field:
170170

@@ -179,12 +179,14 @@ fast: true
179179

180180
**Parameters for i64, u64 and f64 field**
181181

182-
| Variable | Description | Default value |
183-
| ------------- | ------------- | ------------- |
184-
| `description` | Optional description for the field. | `None` |
185-
| `stored` | Whether the field values are stored in the document store | `true` |
186-
| `indexed` | Whether the field values are indexed | `true` |
187-
| `fast` | Whether the field values are stored in a fast field | `false` |
182+
| Variable | Description | Default value |
183+
| --------------- | ------------- | ------------- |
184+
| `description` | Optional description for the field. | `None` |
185+
| `stored` | Whether the field values are stored in the document store. | `true` |
186+
| `indexed` | Whether the field values are indexed. | `true` |
187+
| `fast` | Whether the field values are stored in a fast field. | `false` |
188+
| `coerce` | Whether to convert numbers passed as strings to integers or floats. | `true` |
189+
| `output_format` | JSON type used to return numbers in search results. Possible values are `number` or `string`. | `number` |
188190

189191
#### `datetime` type
190192

quickwit/quickwit-doc-mapper/Cargo.toml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,12 @@ quickwit-query = { workspace = true }
3838
criterion = { workspace = true }
3939
matches = { workspace = true }
4040
proptest = { workspace = true }
41-
quickwit-proto = { workspace = true }
42-
quickwit-query = { workspace = true, features = ["testsuite"] }
4341
serde_yaml = { workspace = true }
4442
time = { workspace = true }
4543

44+
quickwit-proto = { workspace = true }
45+
quickwit-query = { workspace = true, features = ["testsuite"] }
46+
4647
[features]
4748
multilang = ["quickwit-query/multilang"]
4849
testsuite = ["multilang"]

quickwit/quickwit-doc-mapper/src/default_doc_mapper/field_mapping_entry.rs

Lines changed: 67 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -95,9 +95,40 @@ pub struct QuickwitNumericOptions {
9595
pub indexed: bool,
9696
#[serde(default)]
9797
pub fast: bool,
98+
#[serde(default = "default_as_true")]
99+
pub coerce: bool,
100+
#[serde(default)]
101+
pub output_format: NumericOutputFormat,
98102
}
99103

100104
impl Default for QuickwitNumericOptions {
105+
fn default() -> Self {
106+
Self {
107+
description: None,
108+
indexed: true,
109+
stored: true,
110+
fast: false,
111+
coerce: true,
112+
output_format: NumericOutputFormat::default(),
113+
}
114+
}
115+
}
116+
117+
#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, utoipa::ToSchema)]
118+
#[serde(deny_unknown_fields)]
119+
pub struct QuickwitBoolOptions {
120+
#[serde(default)]
121+
#[serde(skip_serializing_if = "Option::is_none")]
122+
pub description: Option<String>,
123+
#[serde(default = "default_as_true")]
124+
pub stored: bool,
125+
#[serde(default = "default_as_true")]
126+
pub indexed: bool,
127+
#[serde(default)]
128+
pub fast: bool,
129+
}
130+
131+
impl Default for QuickwitBoolOptions {
101132
fn default() -> Self {
102133
Self {
103134
description: None,
@@ -150,15 +181,15 @@ pub enum BinaryFormat {
150181
impl BinaryFormat {
151182
pub fn as_str(&self) -> &str {
152183
match self {
153-
BinaryFormat::Base64 => "base64",
154-
BinaryFormat::Hex => "hex",
184+
Self::Base64 => "base64",
185+
Self::Hex => "hex",
155186
}
156187
}
157188

158189
pub fn format_to_json(&self, value: &[u8]) -> JsonValue {
159190
match self {
160-
BinaryFormat::Base64 => BASE64_STANDARD.encode(value).into(),
161-
BinaryFormat::Hex => hex::encode(value).into(),
191+
Self::Base64 => BASE64_STANDARD.encode(value).into(),
192+
Self::Hex => hex::encode(value).into(),
162193
}
163194
}
164195

@@ -172,21 +203,27 @@ impl BinaryFormat {
172203
));
173204
};
174205
let payload = match self {
175-
BinaryFormat::Base64 => {
176-
BASE64_STANDARD
177-
.decode(&byte_str)
178-
.map_err(|base64_decode_err| {
179-
format!("Expected base64 string, got `{byte_str}`: {base64_decode_err}")
180-
})?
181-
}
182-
BinaryFormat::Hex => hex::decode(&byte_str).map_err(|hex_decode_err| {
206+
Self::Base64 => BASE64_STANDARD
207+
.decode(&byte_str)
208+
.map_err(|base64_decode_err| {
209+
format!("Expected base64 string, got `{byte_str}`: {base64_decode_err}")
210+
})?,
211+
Self::Hex => hex::decode(&byte_str).map_err(|hex_decode_err| {
183212
format!("Expected hex string, got `{byte_str}`: {hex_decode_err}")
184213
})?,
185214
};
186215
Ok(TantivyValue::Bytes(payload))
187216
}
188217
}
189218

219+
#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash, Default, Serialize, Deserialize)]
220+
#[serde(rename_all = "snake_case")]
221+
pub enum NumericOutputFormat {
222+
#[default]
223+
Number,
224+
String,
225+
}
226+
190227
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, utoipa::ToSchema)]
191228
#[serde(deny_unknown_fields)]
192229
pub struct QuickwitIpAddrOptions {
@@ -618,8 +655,8 @@ fn deserialize_mapping_type(
618655
Ok(FieldMappingType::F64(numeric_options, cardinality))
619656
}
620657
Type::Bool => {
621-
let numeric_options: QuickwitNumericOptions = serde_json::from_value(json)?;
622-
Ok(FieldMappingType::Bool(numeric_options, cardinality))
658+
let bool_options: QuickwitBoolOptions = serde_json::from_value(json)?;
659+
Ok(FieldMappingType::Bool(bool_options, cardinality))
623660
}
624661
Type::IpAddr => {
625662
let ip_addr_options: QuickwitIpAddrOptions = serde_json::from_value(json)?;
@@ -685,8 +722,8 @@ fn typed_mapping_to_json_params(
685722
FieldMappingType::Text(text_options, _) => serialize_to_map(&text_options),
686723
FieldMappingType::U64(options, _)
687724
| FieldMappingType::I64(options, _)
688-
| FieldMappingType::F64(options, _)
689-
| FieldMappingType::Bool(options, _) => serialize_to_map(&options),
725+
| FieldMappingType::F64(options, _) => serialize_to_map(&options),
726+
FieldMappingType::Bool(options, _) => serialize_to_map(&options),
690727
FieldMappingType::Bytes(options, _) => serialize_to_map(&options),
691728
FieldMappingType::IpAddr(options, _) => serialize_to_map(&options),
692729
FieldMappingType::DateTime(date_time_options, _) => serialize_to_map(&date_time_options),
@@ -1062,20 +1099,21 @@ mod tests {
10621099

10631100
#[test]
10641101
fn test_deserialize_i64_parsing_error_with_text_options() {
1065-
let result = serde_json::from_str::<FieldMappingEntry>(
1102+
let error = serde_json::from_str::<FieldMappingEntry>(
10661103
r#"
10671104
{
10681105
"name": "my_field_name",
10691106
"type": "i64",
10701107
"tokenizer": "basic"
10711108
}
10721109
"#,
1073-
);
1074-
let error = result.unwrap_err();
1110+
)
1111+
.unwrap_err();
1112+
10751113
assert_eq!(
10761114
error.to_string(),
10771115
"Error while parsing field `my_field_name`: unknown field `tokenizer`, expected one \
1078-
of `description`, `stored`, `indexed`, `fast`"
1116+
of `description`, `stored`, `indexed`, `fast`, `coerce`, `output_format`"
10791117
);
10801118
}
10811119

@@ -1146,6 +1184,8 @@ mod tests {
11461184
"stored": true,
11471185
"fast": false,
11481186
"indexed": true,
1187+
"coerce": true,
1188+
"output_format": "number"
11491189
})
11501190
);
11511191
Ok(())
@@ -1165,7 +1205,7 @@ mod tests {
11651205
.unwrap_err()
11661206
.to_string(),
11671207
"Error while parsing field `my_field_name`: unknown field `tokenizer`, expected one \
1168-
of `description`, `stored`, `indexed`, `fast`"
1208+
of `description`, `stored`, `indexed`, `fast`, `coerce`, `output_format`"
11691209
);
11701210
}
11711211

@@ -1232,6 +1272,8 @@ mod tests {
12321272
"stored": true,
12331273
"fast": false,
12341274
"indexed": true,
1275+
"coerce": true,
1276+
"output_format": "number"
12351277
})
12361278
);
12371279
}
@@ -1256,6 +1298,8 @@ mod tests {
12561298
"stored": true,
12571299
"fast": false,
12581300
"indexed": true,
1301+
"coerce": true,
1302+
"output_format": "number"
12591303
})
12601304
);
12611305
}
@@ -1616,6 +1660,8 @@ mod tests {
16161660
"stored": true,
16171661
"fast": false,
16181662
"indexed": true,
1663+
"coerce": true,
1664+
"output_format": "number"
16191665
})
16201666
);
16211667
}

quickwit/quickwit-doc-mapper/src/default_doc_mapper/field_mapping_type.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
use tantivy::schema::Type;
2121

2222
use super::date_time_type::QuickwitDateTimeOptions;
23+
use super::field_mapping_entry::QuickwitBoolOptions;
2324
use crate::default_doc_mapper::field_mapping_entry::{
2425
QuickwitBytesOptions, QuickwitIpAddrOptions, QuickwitJsonOptions, QuickwitNumericOptions,
2526
QuickwitObjectOptions, QuickwitTextOptions,
@@ -41,7 +42,7 @@ pub(crate) enum FieldMappingType {
4142
/// 64-bit float mapping type configuration.
4243
F64(QuickwitNumericOptions, Cardinality),
4344
/// Bool mapping type configuration.
44-
Bool(QuickwitNumericOptions, Cardinality),
45+
Bool(QuickwitBoolOptions, Cardinality),
4546
/// IP Address mapping type configuration.
4647
IpAddr(QuickwitIpAddrOptions, Cardinality),
4748
/// Bytes mapping type configuration.

0 commit comments

Comments
 (0)