-
Notifications
You must be signed in to change notification settings - Fork 40
Better handling on Kafka #1417
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Better handling on Kafka #1417
Changes from 20 commits
7d7e5b0
d3b62b9
9c3c7b8
8cfc683
3ba86a3
479f4ca
4e7fec0
ba8b35f
9f1472d
32a6be5
0bf35e6
bb98935
bb2a29a
6bd7758
938f253
950df68
a687f26
02ec8b5
d3ac263
918ea55
2703836
6653d01
5b44680
cfed1d8
e2efb4f
b16fe48
097d5e5
d769c21
057870e
6295ef5
f04ed4c
ab5eac5
cf016f9
4b77bbe
e551321
e190a78
6240387
0c5ec2a
838e1c8
150a7a4
ccebd1c
2cd6bc9
c298c15
d7e255e
306f5e7
4c5e65d
5e453e0
5ed14f7
7e1723d
5e332bc
65d6e15
95259cf
14b29a8
20f37a3
1b6d970
cd4736e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
package kafkalib | ||
|
||
import "fmt" | ||
|
||
type FetchMessageError struct { | ||
Err error | ||
} | ||
|
||
func NewFetchMessageError(err error) FetchMessageError { | ||
return FetchMessageError{ | ||
Err: err, | ||
} | ||
} | ||
|
||
func (e FetchMessageError) Error() string { | ||
return fmt.Sprintf("failed to fetch message: %v", e.Err) | ||
} | ||
|
||
func IsFetchMessageError(err error) bool { | ||
_, ok := err.(FetchMessageError) | ||
return ok | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,6 +10,7 @@ import ( | |
"github.com/artie-labs/transfer/lib/cdc" | ||
"github.com/artie-labs/transfer/lib/config" | ||
"github.com/artie-labs/transfer/lib/destination" | ||
"github.com/artie-labs/transfer/lib/kafkalib" | ||
"github.com/artie-labs/transfer/lib/retry" | ||
"github.com/artie-labs/transfer/lib/stringutil" | ||
"github.com/artie-labs/transfer/lib/telemetry/metrics/base" | ||
|
@@ -42,6 +43,16 @@ func Flush(ctx context.Context, inMemDB *models.DatabaseData, dest destination.B | |
} | ||
} | ||
|
||
topicsToConsumerProvider := make(map[string]*kafkalib.ConsumerProvider) | ||
for topic := range topicToTables { | ||
consumer, err := kafkalib.GetConsumerFromContext(ctx, topic) | ||
if err != nil { | ||
return fmt.Errorf("failed to get consumer from context: %w", err) | ||
} | ||
|
||
topicsToConsumerProvider[topic] = consumer | ||
} | ||
|
||
// Flush will take everything in memory and call the destination to create temp tables. | ||
var wg sync.WaitGroup | ||
for topic, tables := range topicToTables { | ||
|
@@ -50,61 +61,68 @@ func Flush(ctx context.Context, inMemDB *models.DatabaseData, dest destination.B | |
continue | ||
} | ||
|
||
for _, tableData := range tables { | ||
wg.Add(1) | ||
go func(_tableData *models.TableData) { | ||
defer wg.Done() | ||
|
||
if args.CoolDown != nil && _tableData.ShouldSkipFlush(*args.CoolDown) { | ||
slog.Debug("Skipping flush because we are currently in a flush cooldown", slog.String("tableID", _tableData.GetTableID().String())) | ||
return | ||
} | ||
|
||
retryCfg, err := retry.NewJitterRetryConfig(1_000, 30_000, 15, retry.AlwaysRetry) | ||
if err != nil { | ||
slog.Error("Failed to create retry config", slog.Any("err", err)) | ||
return | ||
} | ||
|
||
_tableData.Lock() | ||
defer _tableData.Unlock() | ||
if _tableData.Empty() { | ||
return | ||
} | ||
|
||
action := "merge" | ||
if _tableData.Mode() == config.History { | ||
action = "append" | ||
} | ||
|
||
start := time.Now() | ||
tags := map[string]string{ | ||
"mode": _tableData.Mode().String(), | ||
"table": _tableData.GetTableID().Table, | ||
"database": _tableData.TopicConfig().Database, | ||
"schema": _tableData.TopicConfig().Schema, | ||
"reason": args.Reason, | ||
} | ||
|
||
what, err := retry.WithRetriesAndResult(retryCfg, func(_ int, _ error) (string, error) { | ||
return flush(ctx, dest, _tableData, action, inMemDB.ClearTableConfig) | ||
}) | ||
|
||
if err != nil { | ||
slog.Error(fmt.Sprintf("Failed to %s", action), slog.Any("err", err), slog.String("tableID", _tableData.GetTableID().String())) | ||
} | ||
|
||
tags["what"] = what | ||
metricsClient.Timing("flush", time.Since(start), tags) | ||
}(tableData) | ||
consumer, ok := topicsToConsumerProvider[topic] | ||
if !ok { | ||
return fmt.Errorf("consumer not found for topic %q", topic) | ||
} | ||
|
||
consumer.LockAndProcess(ctx, args.Topic == "", func() error { | ||
for _, tableData := range tables { | ||
wg.Add(1) | ||
go func(_tableData *models.TableData) { | ||
defer wg.Done() | ||
|
||
if args.CoolDown != nil && _tableData.ShouldSkipFlush(*args.CoolDown) { | ||
slog.Debug("Skipping flush because we are currently in a flush cooldown", slog.String("tableID", _tableData.GetTableID().String())) | ||
return | ||
} | ||
|
||
retryCfg, err := retry.NewJitterRetryConfig(1_000, 30_000, 15, retry.AlwaysRetry) | ||
if err != nil { | ||
slog.Error("Failed to create retry config", slog.Any("err", err)) | ||
return | ||
} | ||
|
||
if _tableData.Empty() { | ||
return | ||
} | ||
|
||
action := "merge" | ||
if _tableData.Mode() == config.History { | ||
action = "append" | ||
} | ||
|
||
start := time.Now() | ||
tags := map[string]string{ | ||
"mode": _tableData.Mode().String(), | ||
"table": _tableData.GetTableID().Table, | ||
"database": _tableData.TopicConfig().Database, | ||
"schema": _tableData.TopicConfig().Schema, | ||
"reason": args.Reason, | ||
} | ||
|
||
what, err := retry.WithRetriesAndResult(retryCfg, func(_ int, _ error) (string, error) { | ||
return flush(ctx, dest, _tableData, action, inMemDB.ClearTableConfig, consumer) | ||
}) | ||
|
||
if err != nil { | ||
slog.Error(fmt.Sprintf("Failed to %s", action), slog.Any("err", err), slog.String("tableID", _tableData.GetTableID().String())) | ||
} | ||
|
||
tags["what"] = what | ||
metricsClient.Timing("flush", time.Since(start), tags) | ||
}(tableData) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bug: Concurrency Issues in TableData HandlingThe removal of Additional Locations (1) |
||
} | ||
|
||
return nil | ||
}) | ||
} | ||
|
||
wg.Wait() | ||
return nil | ||
} | ||
|
||
func flush(ctx context.Context, dest destination.Baseline, _tableData *models.TableData, action string, clearTableConfig func(cdc.TableID)) (string, error) { | ||
func flush(ctx context.Context, dest destination.Baseline, _tableData *models.TableData, action string, clearTableConfig func(cdc.TableID), consumer *kafkalib.ConsumerProvider) (string, error) { | ||
// This is added so that we have a new temporary table suffix for each merge / append. | ||
_tableData.ResetTempTableSuffix() | ||
|
||
|
@@ -122,8 +140,10 @@ func flush(ctx context.Context, dest destination.Baseline, _tableData *models.Ta | |
} | ||
|
||
if commitTransaction { | ||
if err = commitOffset(ctx, _tableData.TopicConfig().Topic, _tableData.PartitionsToLastMessage); err != nil { | ||
return "commit_fail", fmt.Errorf("failed to commit kafka offset: %w", err) | ||
for _, msg := range _tableData.PartitionsToLastMessage { | ||
if err = consumer.CommitMessage(ctx, msg.GetMessage()); err != nil { | ||
return "commit_fail", fmt.Errorf("failed to commit kafka offset: %w", err) | ||
} | ||
} | ||
|
||
slog.Info(fmt.Sprintf("%s success, clearing memory...", stringutil.CapitalizeFirstLetter(action)), slog.String("tableID", _tableData.GetTableID().String())) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Peek?