Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmd/epp/runner/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ func (r *Runner) Run(ctx context.Context) error {
return fmt.Errorf("failed to initialize Flow Controller: %w", err)
}
go registry.Run(ctx)
admissionController = requestcontrol.NewFlowControlAdmissionController(fc)
admissionController = requestcontrol.NewFlowControlAdmissionController(fc, opts.PoolName)
Copy link
Contributor Author

@LukeAVanDrie LukeAVanDrie Dec 16, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can also get this from the data layer via PoolGet (e.g., in requestcontrol/director.go or handlers/server.go). My understanding is that this is a static value though, so this should be sufficient and avoids a lookup.

} else {
setupLog.Info("Experimental Flow Control layer is disabled, using legacy admission control")
admissionController = requestcontrol.NewLegacyAdmissionController(saturationDetector, locator)
Expand Down
11 changes: 8 additions & 3 deletions pkg/epp/flowcontrol/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -212,10 +212,15 @@ func (fc *FlowController) EnqueueAndWait(
req types.FlowControlRequest,
) (types.QueueOutcome, error) {
flowKey := req.FlowKey()
fairnessID := flowKey.ID
priority := strconv.Itoa(flowKey.Priority)
metrics.IncFlowControlQueueSize(fairnessID, priority)
defer metrics.DecFlowControlQueueSize(fairnessID, priority)
metrics.IncFlowControlQueueSize(
flowKey.ID, priority,
req.InferencePoolName(),
req.ModelName(), req.TargetModelName())
defer metrics.DecFlowControlQueueSize(
flowKey.ID, priority,
req.InferencePoolName(),
req.ModelName(), req.TargetModelName())

// 1. Create the derived context that governs this request's lifecycle (Parent Cancellation + TTL).
reqCtx, cancel, enqueueTime := fc.createRequestContext(ctx, req)
Expand Down
6 changes: 5 additions & 1 deletion pkg/epp/flowcontrol/controller/internal/item.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,11 @@ func (fi *FlowItem) finalizeInternal(outcome types.QueueOutcome, err error) {

duration := time.Since(fi.enqueueTime)
flowKey := fi.originalRequest.FlowKey()
metrics.RecordFlowControlRequestQueueDuration(flowKey.ID, strconv.Itoa(flowKey.Priority), outcome.String(), duration)
metrics.RecordFlowControlRequestQueueDuration(
flowKey.ID, strconv.Itoa(flowKey.Priority), outcome.String(),
fi.originalRequest.InferencePoolName(),
fi.OriginalRequest().ModelName(), fi.OriginalRequest().TargetModelName(),
duration)

fi.done <- finalState
close(fi.done)
Expand Down
50 changes: 47 additions & 3 deletions pkg/epp/flowcontrol/types/mocks/mocks.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,27 +31,64 @@ type MockFlowControlRequest struct {
InitialEffectiveTTLV time.Duration
IDV string
MetadataV map[string]any
InferencePoolNameV string
ModelNameV string
TargetModelNameV string
}

// NewMockFlowControlRequest creates a new MockFlowControlRequest instance.
// MockRequestOption is a functional option for configuring a MockFlowControlRequest.
type MockRequestOption func(*MockFlowControlRequest)

// WithInferencePoolName sets the InferencePoolName for the mock request.
func WithInferencePoolName(name string) MockRequestOption {
return func(m *MockFlowControlRequest) {
m.InferencePoolNameV = name
}
}

// WithModelName sets the ModelName for the mock request.
func WithModelName(name string) MockRequestOption {
return func(m *MockFlowControlRequest) {
m.ModelNameV = name
}
}

// WithTargetModelName sets the TargetModelName for the mock request.
func WithTargetModelName(name string) MockRequestOption {
return func(m *MockFlowControlRequest) {
m.TargetModelNameV = name
}
}

// NewMockFlowControlRequest creates a new MockFlowControlRequest instance with optional configuration.
func NewMockFlowControlRequest(
byteSize uint64,
id string,
key types.FlowKey,
opts ...MockRequestOption,
) *MockFlowControlRequest {
return &MockFlowControlRequest{
m := &MockFlowControlRequest{
ByteSizeV: byteSize,
IDV: id,
FlowKeyV: key,
MetadataV: make(map[string]any),
}

for _, opt := range opts {
opt(m)
}

return m
}

func (m *MockFlowControlRequest) FlowKey() types.FlowKey { return m.FlowKeyV }
func (m *MockFlowControlRequest) ByteSize() uint64 { return m.ByteSizeV }
func (m *MockFlowControlRequest) InitialEffectiveTTL() time.Duration { return m.InitialEffectiveTTLV }
func (m *MockFlowControlRequest) ID() string { return m.IDV }
func (m *MockFlowControlRequest) GetMetadata() map[string]any { return m.MetadataV }
func (m *MockFlowControlRequest) InferencePoolName() string { return m.InferencePoolNameV }
func (m *MockFlowControlRequest) ModelName() string { return m.ModelNameV }
func (m *MockFlowControlRequest) TargetModelName() string { return m.TargetModelNameV }

var _ types.FlowControlRequest = &MockFlowControlRequest{}

Expand Down Expand Up @@ -92,13 +129,20 @@ var _ types.QueueItemAccessor = &MockQueueItemAccessor{}

// NewMockQueueItemAccessor is a constructor for `MockQueueItemAccessor` that initializes the mock with a default
// `MockFlowControlRequest` and `MockQueueItemHandle` to prevent nil pointer dereferences in tests.
func NewMockQueueItemAccessor(byteSize uint64, reqID string, key types.FlowKey) *MockQueueItemAccessor {
// It accepts MockRequestOptions to configure the underlying request.
func NewMockQueueItemAccessor(
byteSize uint64,
reqID string,
key types.FlowKey,
opts ...MockRequestOption,
) *MockQueueItemAccessor {
return &MockQueueItemAccessor{
EnqueueTimeV: time.Now(),
OriginalRequestV: NewMockFlowControlRequest(
byteSize,
reqID,
key,
opts...,
),
HandleV: &MockQueueItemHandle{},
}
Expand Down
12 changes: 12 additions & 0 deletions pkg/epp/flowcontrol/types/request.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,18 @@ type FlowControlRequest interface {
// This data is passed transparently to components like the contracts.PodLocator to resolve resources (candidate pods)
// lazily during the dispatch cycle.
GetMetadata() map[string]any

// --- Passthrough for Observability ---

// InferencePoolName returns the name of the backend pool this request is targeting.
// This is used for observability (metrics labeling) to correlate queue depth with specific backend pools.
InferencePoolName() string

// ModelName returns the name of the base model being requested (e.g., "llama-2-70b").
ModelName() string

// TargetModelName returns the name of the specific adapter or traffic target (e.g., "finance-lora-v1").
TargetModelName() string
}

// QueueItemHandle is an opaque handle to an item that has been successfully added to a `framework.SafeQueue`. It acts
Expand Down
Loading