diff --git a/generator/config/stage/scoreFusion.yaml b/generator/config/stage/scoreFusion.yaml new file mode 100644 index 000000000..8102e8f7a --- /dev/null +++ b/generator/config/stage/scoreFusion.yaml @@ -0,0 +1,77 @@ +# yaml-language-server: $schema=../schema.json +name: $scoreFusion +link: 'https://www.mongodb.com/docs/manual/reference/operator/aggregation/scoreFusion/' +type: + - stage +encode: object +description: | + Combines multiple pipelines using relative score fusion to create hybrid search results. +arguments: + - + name: input + type: + - object + description: | + An object with the following required fields: + - input.pipelines: Map from name to input pipeline. Each pipeline must be operating on the same collection. Minimum of one pipeline. + - input.normalization: Normalizes the score to the range 0 to 1 before combining the results. Value can be none, sigmoid or minMaxScaler. + - + name: combination + optional: true + type: + - object + description: | + An object with the following optional fields: + - combination.weights: Map from pipeline name to numbers (non-negative). If unspecified, default weight is 1 for each pipeline. + - combination.method: Specifies method for combining scores. Value can be avg or expression. Default is avg. + - combination.expression: This is the custom expression that is used when combination.method is set to expression. + - + name: scoreDetails + type: + - bool + default: false + description: Set to true to include detailed scoring information. +tests: + - + name: 'Example' + link: 'https://www.mongodb.com/docs/manual/reference/operator/aggregation/scoreFusion/#examples' + pipeline: + - + $scoreFusion: + input: + pipelines: + searchOne: + - + $vectorSearch: + index: 'vector_index' + path: 'plot_embedding' + queryVector: [-0.0016261312, -0.028070757, -0.011342932] + numCandidates: 150 + limit: 10 + searchTwo: + - + $search: + index: '' + text: + query: '' + path: '' + normalization: 'sigmoid' + combination: + method: 'expression' + expression: + $sum: + - + $multiply: + - '$$searchOne' + - 10 + - '$$searchTwo' + scoreDetails: true + - + $project: + _id: 1 + title: 1 + plot: 1 + scoreDetails: + $meta: 'scoreDetails' + - + $limit: 20 diff --git a/src/Builder/Stage/FactoryTrait.php b/src/Builder/Stage/FactoryTrait.php index df5860ded..0edcf317e 100644 --- a/src/Builder/Stage/FactoryTrait.php +++ b/src/Builder/Stage/FactoryTrait.php @@ -534,6 +534,27 @@ public static function sample(int $size): SampleStage return new SampleStage($size); } + /** + * Combines multiple pipelines using relative score fusion to create hybrid search results. + * + * @see https://www.mongodb.com/docs/manual/reference/operator/aggregation/scoreFusion/ + * @param Document|Serializable|array|stdClass $input An object with the following required fields: + * - input.pipelines: Map from name to input pipeline. Each pipeline must be operating on the same collection. Minimum of one pipeline. + * - input.normalization: Normalizes the score to the range 0 to 1 before combining the results. Value can be none, sigmoid or minMaxScaler. + * @param bool $scoreDetails Set to true to include detailed scoring information. + * @param Optional|Document|Serializable|array|stdClass $combination An object with the following optional fields: + * - combination.weights: Map from pipeline name to numbers (non-negative). If unspecified, default weight is 1 for each pipeline. + * - combination.method: Specifies method for combining scores. Value can be avg or expression. Default is avg. + * - combination.expression: This is the custom expression that is used when combination.method is set to expression. + */ + public static function scoreFusion( + Document|Serializable|stdClass|array $input, + bool $scoreDetails = false, + Optional|Document|Serializable|stdClass|array $combination = Optional::Undefined, + ): ScoreFusionStage { + return new ScoreFusionStage($input, $scoreDetails, $combination); + } + /** * Performs a full-text search of the field or fields in an Atlas collection. * NOTE: $search is only available for MongoDB Atlas clusters, and is not available for self-managed deployments. diff --git a/src/Builder/Stage/FluentFactoryTrait.php b/src/Builder/Stage/FluentFactoryTrait.php index 1b9e542ac..729ef65f1 100644 --- a/src/Builder/Stage/FluentFactoryTrait.php +++ b/src/Builder/Stage/FluentFactoryTrait.php @@ -603,6 +603,29 @@ public function sample(int $size): static return $this; } + /** + * Combines multiple pipelines using relative score fusion to create hybrid search results. + * + * @see https://www.mongodb.com/docs/manual/reference/operator/aggregation/scoreFusion/ + * @param Document|Serializable|array|stdClass $input An object with the following required fields: + * - input.pipelines: Map from name to input pipeline. Each pipeline must be operating on the same collection. Minimum of one pipeline. + * - input.normalization: Normalizes the score to the range 0 to 1 before combining the results. Value can be none, sigmoid or minMaxScaler. + * @param bool $scoreDetails Set to true to include detailed scoring information. + * @param Optional|Document|Serializable|array|stdClass $combination An object with the following optional fields: + * - combination.weights: Map from pipeline name to numbers (non-negative). If unspecified, default weight is 1 for each pipeline. + * - combination.method: Specifies method for combining scores. Value can be avg or expression. Default is avg. + * - combination.expression: This is the custom expression that is used when combination.method is set to expression. + */ + public function scoreFusion( + Document|Serializable|stdClass|array $input, + bool $scoreDetails = false, + Optional|Document|Serializable|stdClass|array $combination = Optional::Undefined, + ): static { + $this->pipeline[] = Stage::scoreFusion($input, $scoreDetails, $combination); + + return $this; + } + /** * Performs a full-text search of the field or fields in an Atlas collection. * NOTE: $search is only available for MongoDB Atlas clusters, and is not available for self-managed deployments. diff --git a/src/Builder/Stage/ScoreFusionStage.php b/src/Builder/Stage/ScoreFusionStage.php new file mode 100644 index 000000000..5595ac9e5 --- /dev/null +++ b/src/Builder/Stage/ScoreFusionStage.php @@ -0,0 +1,68 @@ + 'input', 'scoreDetails' => 'scoreDetails', 'combination' => 'combination']; + + /** + * @var Document|Serializable|array|stdClass $input An object with the following required fields: + * - input.pipelines: Map from name to input pipeline. Each pipeline must be operating on the same collection. Minimum of one pipeline. + * - input.normalization: Normalizes the score to the range 0 to 1 before combining the results. Value can be none, sigmoid or minMaxScaler. + */ + public readonly Document|Serializable|stdClass|array $input; + + /** @var bool $scoreDetails Set to true to include detailed scoring information. */ + public readonly bool $scoreDetails; + + /** + * @var Optional|Document|Serializable|array|stdClass $combination An object with the following optional fields: + * - combination.weights: Map from pipeline name to numbers (non-negative). If unspecified, default weight is 1 for each pipeline. + * - combination.method: Specifies method for combining scores. Value can be avg or expression. Default is avg. + * - combination.expression: This is the custom expression that is used when combination.method is set to expression. + */ + public readonly Optional|Document|Serializable|stdClass|array $combination; + + /** + * @param Document|Serializable|array|stdClass $input An object with the following required fields: + * - input.pipelines: Map from name to input pipeline. Each pipeline must be operating on the same collection. Minimum of one pipeline. + * - input.normalization: Normalizes the score to the range 0 to 1 before combining the results. Value can be none, sigmoid or minMaxScaler. + * @param bool $scoreDetails Set to true to include detailed scoring information. + * @param Optional|Document|Serializable|array|stdClass $combination An object with the following optional fields: + * - combination.weights: Map from pipeline name to numbers (non-negative). If unspecified, default weight is 1 for each pipeline. + * - combination.method: Specifies method for combining scores. Value can be avg or expression. Default is avg. + * - combination.expression: This is the custom expression that is used when combination.method is set to expression. + */ + public function __construct( + Document|Serializable|stdClass|array $input, + bool $scoreDetails = false, + Optional|Document|Serializable|stdClass|array $combination = Optional::Undefined, + ) { + $this->input = $input; + $this->scoreDetails = $scoreDetails; + $this->combination = $combination; + } +} diff --git a/tests/Builder/Stage/Pipelines.php b/tests/Builder/Stage/Pipelines.php index 81f5b8fe3..15525996a 100644 --- a/tests/Builder/Stage/Pipelines.php +++ b/tests/Builder/Stage/Pipelines.php @@ -2554,6 +2554,99 @@ enum Pipelines: string ] JSON; + /** + * Example + * + * @see https://www.mongodb.com/docs/manual/reference/operator/aggregation/scoreFusion/#examples + */ + case ScoreFusionExample = <<<'JSON' + [ + { + "$scoreFusion": { + "input": { + "pipelines": { + "searchOne": [ + { + "$vectorSearch": { + "index": "vector_index", + "path": "plot_embedding", + "queryVector": [ + { + "$numberDouble": "-0.0016261311999999999121" + }, + { + "$numberDouble": "-0.028070756999999998266" + }, + { + "$numberDouble": "-0.011342932000000000015" + } + ], + "numCandidates": { + "$numberInt": "150" + }, + "limit": { + "$numberInt": "10" + } + } + } + ], + "searchTwo": [ + { + "$search": { + "index": "", + "text": { + "query": "", + "path": "" + } + } + } + ] + }, + "normalization": "sigmoid" + }, + "combination": { + "method": "expression", + "expression": { + "$sum": [ + { + "$multiply": [ + "$$searchOne", + { + "$numberInt": "10" + } + ] + }, + "$$searchTwo" + ] + } + }, + "scoreDetails": true + } + }, + { + "$project": { + "_id": { + "$numberInt": "1" + }, + "title": { + "$numberInt": "1" + }, + "plot": { + "$numberInt": "1" + }, + "scoreDetails": { + "$meta": "scoreDetails" + } + } + }, + { + "$limit": { + "$numberInt": "20" + } + } + ] + JSON; + /** * Example * diff --git a/tests/Builder/Stage/ScoreFusionStageTest.php b/tests/Builder/Stage/ScoreFusionStageTest.php new file mode 100644 index 000000000..d727cae6f --- /dev/null +++ b/tests/Builder/Stage/ScoreFusionStageTest.php @@ -0,0 +1,68 @@ + [ + 'searchOne' => new Pipeline( + Stage::vectorSearch( + index: 'vector_index', + path: 'plot_embedding', + queryVector: [-0.0016261312, -0.028070757, -0.011342932], + numCandidates: 150, + limit: 10, + ), + ), + 'searchTwo' => new Pipeline( + Stage::search( + Search::text( + query: '', + path: '', + ), + index: '', + ), + ), + ], + 'normalization' => 'sigmoid', + ], + combination: [ + 'method' => 'expression', + 'expression' => Expression::sum( + Expression::multiply( + Expression::variable('searchOne'), + 10, + ), + Expression::variable('searchTwo'), + ), + ], + scoreDetails: true, + ), + Stage::project( + _id: 1, + title: 1, + plot: 1, + scoreDetails: Expression::meta('scoreDetails'), + ), + Stage::limit(20), + ); + + $this->assertSamePipeline(Pipelines::ScoreFusionExample, $pipeline); + } +}