Skip to content

Commit 90db1d1

Browse files
authored
fix: Support passing batch source to streaming sources for backfills (#2523)
* fix: Support passing batch source to streaming sources for backfills Signed-off-by: Achal Shah <[email protected]> * fix tests Signed-off-by: Achal Shah <[email protected]> * fix tests Signed-off-by: Achal Shah <[email protected]>
1 parent c22fa2c commit 90db1d1

File tree

4 files changed

+25
-12
lines changed

4 files changed

+25
-12
lines changed

go/embedded/online_features.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,13 +63,13 @@ func (s *OnlineFeatureService) GetEntityTypesMap(featureRefs []string) (map[stri
6363

6464
joinKeyTypes := make(map[string]int32)
6565

66-
for viewName, _ := range viewNames {
66+
for viewName := range viewNames {
6767
view, err := s.fs.GetFeatureView(viewName, true)
6868
if err != nil {
6969
// skip on demand feature views
7070
continue
7171
}
72-
for entityName, _ := range view.Entities {
72+
for entityName := range view.Entities {
7373
entity := entitiesByName[entityName]
7474
joinKeyTypes[entity.JoinKey] = int32(entity.ValueType.Number())
7575
}
@@ -98,7 +98,7 @@ func (s *OnlineFeatureService) GetEntityTypesMapByFeatureService(featureServiceN
9898
// skip on demand feature views
9999
continue
100100
}
101-
for entityName, _ := range view.Entities {
101+
for entityName := range view.Entities {
102102
entity := entitiesByName[entityName]
103103
joinKeyTypes[entity.JoinKey] = int32(entity.ValueType.Number())
104104
}

protos/feast/core/DataSource.proto

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ import "feast/core/DataFormat.proto";
2626
import "feast/types/Value.proto";
2727

2828
// Defines a Data Source that can be used source Feature data
29-
// Next available id: 26
29+
// Next available id: 27
3030
message DataSource {
3131
// Field indexes should *not* be reused. Not sure if fields 6-10 were used previously or not,
3232
// but they are going to be reserved for backwards compatibility.
@@ -82,6 +82,10 @@ message DataSource {
8282
// first party sources as well.
8383
string data_source_class_type = 17;
8484

85+
// Optional batch source for streaming sources for historical features and materialization.
86+
DataSource batch_source = 26;
87+
88+
8589
// Defines options for DataSource that sources features from a file
8690
message FileOptions {
8791
FileFormat file_format = 1;
@@ -128,6 +132,7 @@ message DataSource {
128132

129133
// Defines the stream data format encoding feature/entity data in Kafka messages.
130134
StreamFormat message_format = 3;
135+
131136
}
132137

133138
// Defines options for DataSource that sources features from Kinesis records.
@@ -199,8 +204,6 @@ message DataSource {
199204
message PushOptions {
200205
// Mapping of feature name to type
201206
map<string, feast.types.ValueType.Enum> schema = 1;
202-
// Optional batch source for the push source for historical features and materialization.
203-
DataSource batch_source = 2;
204207
}
205208

206209

sdk/python/feast/data_source.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,7 @@ def __init__(
360360
tags: Optional[Dict[str, str]] = None,
361361
owner: Optional[str] = "",
362362
timestamp_field: Optional[str] = "",
363+
batch_source: Optional[DataSource] = None,
363364
):
364365
super().__init__(
365366
event_timestamp_column=event_timestamp_column,
@@ -372,6 +373,7 @@ def __init__(
372373
name=name,
373374
timestamp_field=timestamp_field,
374375
)
376+
self.batch_source = batch_source
375377
self.kafka_options = KafkaOptions(
376378
bootstrap_servers=bootstrap_servers,
377379
message_format=message_format,
@@ -411,6 +413,7 @@ def from_proto(data_source: DataSourceProto):
411413
description=data_source.description,
412414
tags=dict(data_source.tags),
413415
owner=data_source.owner,
416+
batch_source=DataSource.from_proto(data_source.batch_source),
414417
)
415418

416419
def to_proto(self) -> DataSourceProto:
@@ -427,6 +430,8 @@ def to_proto(self) -> DataSourceProto:
427430
data_source_proto.timestamp_field = self.timestamp_field
428431
data_source_proto.created_timestamp_column = self.created_timestamp_column
429432
data_source_proto.date_partition_column = self.date_partition_column
433+
if self.batch_source:
434+
data_source_proto.batch_source.MergeFrom(self.batch_source.to_proto())
430435
return data_source_proto
431436

432437
@staticmethod
@@ -546,6 +551,7 @@ def from_proto(data_source: DataSourceProto):
546551
description=data_source.description,
547552
tags=dict(data_source.tags),
548553
owner=data_source.owner,
554+
batch_source=DataSource.from_proto(data_source.batch_source),
549555
)
550556

551557
@staticmethod
@@ -569,6 +575,7 @@ def __init__(
569575
tags: Optional[Dict[str, str]] = None,
570576
owner: Optional[str] = "",
571577
timestamp_field: Optional[str] = "",
578+
batch_source: Optional[DataSource] = None,
572579
):
573580
super().__init__(
574581
name=name,
@@ -581,6 +588,7 @@ def __init__(
581588
owner=owner,
582589
timestamp_field=timestamp_field,
583590
)
591+
self.batch_source = batch_source
584592
self.kinesis_options = KinesisOptions(
585593
record_format=record_format, region=region, stream_name=stream_name
586594
)
@@ -618,6 +626,8 @@ def to_proto(self) -> DataSourceProto:
618626
data_source_proto.timestamp_field = self.timestamp_field
619627
data_source_proto.created_timestamp_column = self.created_timestamp_column
620628
data_source_proto.date_partition_column = self.date_partition_column
629+
if self.batch_source:
630+
data_source_proto.batch_source.MergeFrom(self.batch_source.to_proto())
621631

622632
return data_source_proto
623633

@@ -634,6 +644,7 @@ class PushSource(DataSource):
634644

635645
def __init__(
636646
self,
647+
*,
637648
name: str,
638649
schema: Dict[str, ValueType],
639650
batch_source: DataSource,
@@ -693,8 +704,8 @@ def from_proto(data_source: DataSourceProto):
693704
for key, val in schema_pb.items():
694705
schema[key] = ValueType(val)
695706

696-
assert data_source.push_options.HasField("batch_source")
697-
batch_source = DataSource.from_proto(data_source.push_options.batch_source)
707+
assert data_source.HasField("batch_source")
708+
batch_source = DataSource.from_proto(data_source.batch_source)
698709

699710
return PushSource(
700711
name=data_source.name,
@@ -714,9 +725,7 @@ def to_proto(self) -> DataSourceProto:
714725
if self.batch_source:
715726
batch_source_proto = self.batch_source.to_proto()
716727

717-
options = DataSourceProto.PushOptions(
718-
schema=schema_pb, batch_source=batch_source_proto
719-
)
728+
options = DataSourceProto.PushOptions(schema=schema_pb,)
720729
data_source_proto = DataSourceProto(
721730
name=self.name,
722731
type=DataSourceProto.PUSH_SOURCE,
@@ -725,6 +734,7 @@ def to_proto(self) -> DataSourceProto:
725734
description=self.description,
726735
tags=self.tags,
727736
owner=self.owner,
737+
batch_source=batch_source_proto,
728738
)
729739

730740
return data_source_proto

sdk/python/tests/unit/test_data_sources.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ def test_push_with_batch():
1212
batch_source=BigQuerySource(table="test.test"),
1313
)
1414
push_source_proto = push_source.to_proto()
15+
assert push_source_proto.HasField("batch_source")
1516
assert push_source_proto.push_options is not None
16-
assert push_source_proto.push_options.HasField("batch_source")
1717

1818
push_source_unproto = PushSource.from_proto(push_source_proto)
1919

0 commit comments

Comments
 (0)