Skip to content

Commit faa20cb

Browse files
Integrate validation into Bundle with comprehensive error tracking
Major update to Bundle that integrates all validation features: Bundle struct changes: - Added parse_errors and validation_errors fields to track errors Bundle::new changes: - Added validate_references parameter (defaults to true) - Collects ALL errors instead of failing on first error - Detects parse errors, duplicates, unknown references, and cycles - Only raises exception in strict mode after collecting all errors - Returns Bundle with error info even in non-strict mode New methods: - get_parse_errors(): Returns syntax errors from FTL parsing - get_validation_errors(): Returns semantic errors (refs, cycles, etc.) - get_all_compile_errors(): Returns combined parse + validation errors - get_required_variables(): Lists variables used by a message - get_compile_errors(): Deprecated, kept for backward compatibility get_translation changes: - Added optional errors parameter to collect format-time errors - Validates variable types and detects missing variables - Reports errors via provided list instead of failing silently This is the main integration point that makes the library production-ready.
1 parent 621ff4f commit faa20cb

File tree

1 file changed

+233
-18
lines changed

1 file changed

+233
-18
lines changed

src/lib.rs

Lines changed: 233 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -708,13 +708,21 @@ mod rustfluent {
708708
#[pyclass]
709709
struct Bundle {
710710
bundle: FluentBundle<FluentResource>,
711+
// Separated compile-time errors by type for clarity
712+
parse_errors: Vec<ParseErrorDetail>, // Syntax errors from FTL parsing
713+
validation_errors: Vec<ValidationError>, // Semantic errors (refs, cycles, etc.)
711714
}
712715

713716
#[pymethods]
714717
impl Bundle {
715718
#[new]
716-
#[pyo3(signature = (language, ftl_filenames, strict=false))]
717-
fn new(language: &str, ftl_filenames: Vec<PathBuf>, strict: bool) -> PyResult<Self> {
719+
#[pyo3(signature = (language, ftl_filenames, strict=false, validate_references=true))]
720+
fn new(
721+
language: &str,
722+
ftl_filenames: Vec<PathBuf>,
723+
strict: bool,
724+
validate_references: bool,
725+
) -> PyResult<Self> {
718726
let langid: LanguageIdentifier = match language.parse() {
719727
Ok(langid) => langid,
720728
Err(_) => {
@@ -724,40 +732,200 @@ mod rustfluent {
724732
}
725733
};
726734
let mut bundle = FluentBundle::new_concurrent(vec![langid]);
735+
// Separate error collections by type
736+
let mut all_parse_errors = Vec::new();
737+
let mut all_validation_errors = Vec::new();
738+
// Track all terms across all resources for validation
739+
let mut all_terms: HashMap<String, TermInfo> = HashMap::new();
727740

728741
for file_path in ftl_filenames.iter() {
729742
let contents = fs::read_to_string(file_path)
730743
.map_err(|_| PyFileNotFoundError::new_err(file_path.clone()))?;
731744

732745
let resource = match FluentResource::try_new(contents) {
733746
Ok(resource) => resource,
734-
Err((resource, errors)) if strict => {
735-
let mut labels = Vec::with_capacity(errors.len());
747+
Err((resource, errors)) => {
748+
// CHANGED: Always collect parse errors (even in strict mode)
749+
// Don't raise immediately - collect all errors first
750+
let source = resource.source();
751+
let filename_str = file_path.to_string_lossy().to_string();
752+
753+
// Create structured error details for programmatic access
736754
for error in errors {
737-
labels.push(LabeledSpan::at(error.pos, format!("{}", error.kind)))
755+
all_parse_errors.push(ParseErrorDetail::from_parser_error(
756+
error.clone(),
757+
source,
758+
Some(filename_str.clone()),
759+
));
738760
}
739-
let error = miette!(
740-
labels = labels,
741-
"Error when parsing {}",
742-
file_path.to_string_lossy()
743-
)
744-
.with_source_code(resource.source().to_string());
745-
return Err(ParserError::new_err(format!("{error:?}")));
761+
762+
// Continue processing to collect more errors
763+
resource
746764
}
747-
Err((resource, _errors)) => resource,
748765
};
766+
767+
// Check for duplicates manually before adding
768+
// Need to detect duplicates both within this file and against existing bundle
769+
use fluent_syntax::ast;
770+
use std::collections::HashSet;
771+
let mut seen_in_file = HashSet::new();
772+
773+
for entry in resource.entries() {
774+
let (kind, id) = match entry {
775+
ast::Entry::Message(msg) => ("message", msg.id.name),
776+
ast::Entry::Term(term) => ("term", term.id.name),
777+
_ => continue,
778+
};
779+
780+
// Check if this message/term already exists in bundle or was seen in this file
781+
let full_id = if kind == "term" {
782+
format!("-{}", id)
783+
} else {
784+
id.to_string()
785+
};
786+
787+
// For terms, check our term index instead of bundle.has_message
788+
let exists_in_bundle = if kind == "term" {
789+
all_terms.contains_key(&full_id)
790+
} else {
791+
bundle.has_message(&full_id)
792+
};
793+
let exists_in_file = !seen_in_file.insert(full_id.clone());
794+
795+
if exists_in_bundle || exists_in_file {
796+
let validation_err = ValidationError {
797+
error_type: "DuplicateMessageId".to_string(),
798+
message: format!(
799+
"Duplicate {}: '{}'. Later definition will override.",
800+
kind, id
801+
),
802+
message_id: Some(id.to_string()),
803+
reference: None,
804+
};
805+
806+
// Don't raise immediately, collect errors
807+
all_validation_errors.push(validation_err);
808+
}
809+
}
810+
811+
// Collect terms from this resource
812+
let current_resource_terms = collect_terms_from_resource(&resource);
813+
814+
// Check references and cycles BEFORE adding if validation is enabled
815+
if validate_references {
816+
// Merge current resource terms with all previously seen terms for validation
817+
let mut available_terms = all_terms.clone();
818+
available_terms.extend(current_resource_terms.clone());
819+
820+
// Check if references in this resource exist in current bundle or available terms
821+
let ref_errors = check_references(&resource, &bundle, &available_terms);
822+
all_validation_errors.extend(ref_errors);
823+
824+
// Check for cycles within this resource
825+
let cycle_errors = detect_cycles(&resource);
826+
all_validation_errors.extend(cycle_errors);
827+
}
828+
829+
// Add terms from this resource to the cumulative term index
830+
all_terms.extend(current_resource_terms);
831+
832+
// Add the resource (will override duplicates)
749833
bundle.add_resource_overriding(resource);
750834
}
751835

752-
Ok(Self { bundle })
836+
// Check strict mode AFTER collecting all errors
837+
if strict && (!all_parse_errors.is_empty() || !all_validation_errors.is_empty()) {
838+
return Err(create_comprehensive_error(
839+
&all_parse_errors,
840+
&all_validation_errors,
841+
));
842+
}
843+
844+
Ok(Self {
845+
bundle,
846+
parse_errors: all_parse_errors,
847+
validation_errors: all_validation_errors,
848+
})
849+
}
850+
851+
/// Get all parse errors (syntax errors from FTL parsing)
852+
fn get_parse_errors(&self) -> Vec<ParseErrorDetail> {
853+
self.parse_errors.clone()
854+
}
855+
856+
/// Get all validation errors (semantic errors: unknown refs, cycles, etc.)
857+
fn get_validation_errors(&self) -> Vec<ValidationError> {
858+
self.validation_errors.clone()
859+
}
860+
861+
/// Get ALL compile-time errors in one call (parse + validation)
862+
/// Returns list of tuples: (error_category: str, error: Union[ParseErrorDetail, ValidationError])
863+
fn get_all_compile_errors(&self, py: Python) -> PyResult<Vec<(String, PyObject)>> {
864+
let mut all_errors = Vec::new();
865+
866+
// Add parse errors with "parse" tag
867+
for err in &self.parse_errors {
868+
let py_err = Py::new(py, err.clone())?;
869+
all_errors.push(("parse".to_string(), py_err.into()));
870+
}
871+
872+
// Add validation errors with "validation" tag
873+
for err in &self.validation_errors {
874+
let py_err = Py::new(py, err.clone())?;
875+
all_errors.push(("validation".to_string(), py_err.into()));
876+
}
877+
878+
Ok(all_errors)
879+
}
880+
881+
/// DEPRECATED: Use get_validation_errors() instead
882+
/// Kept for backward compatibility
883+
fn get_compile_errors(&self) -> Vec<ValidationError> {
884+
self.validation_errors.clone()
753885
}
754886

755-
#[pyo3(signature = (identifier, variables=None, use_isolating=true))]
887+
/// Get list of variable names used by a specific message
888+
///
889+
/// # Arguments
890+
/// * `identifier` - Message ID (e.g., "hello" or "hello.attribute")
891+
///
892+
/// # Returns
893+
/// Sorted list of variable names (e.g., ["count", "user"])
894+
fn get_required_variables(&self, identifier: &str) -> PyResult<Vec<String>> {
895+
let get_message = |id: &str| {
896+
self.bundle
897+
.get_message(id)
898+
.ok_or_else(|| PyValueError::new_err(format!("{id} not found")))
899+
};
900+
901+
let pattern = match identifier.split_once('.') {
902+
Some((message_id, attribute_id)) => get_message(message_id)?
903+
.get_attribute(attribute_id)
904+
.ok_or_else(|| {
905+
PyValueError::new_err(format!(
906+
"Attribute '{attribute_id}' not found on message '{message_id}'"
907+
))
908+
})?
909+
.value(),
910+
None => get_message(identifier)?.value().ok_or_else(|| {
911+
PyValueError::new_err(format!("{identifier} - Message has no value."))
912+
})?,
913+
};
914+
915+
let vars = extract_variable_references(pattern);
916+
let mut vars_vec: Vec<String> = vars.into_iter().collect();
917+
vars_vec.sort(); // Sort for deterministic output
918+
919+
Ok(vars_vec)
920+
}
921+
922+
#[pyo3(signature = (identifier, variables=None, use_isolating=true, errors=None))]
756923
pub fn get_translation(
757924
&mut self,
758925
identifier: &str,
759926
variables: Option<&Bound<'_, PyDict>>,
760927
use_isolating: bool,
928+
errors: Option<&Bound<'_, PyList>>,
761929
) -> PyResult<String> {
762930
self.bundle.set_use_isolating(use_isolating);
763931

@@ -785,7 +953,12 @@ mod rustfluent {
785953
})?
786954
};
787955

956+
// Extract all variables used by this pattern
957+
let required_vars = extract_variable_references(pattern);
958+
788959
let mut args = FluentArgs::new();
960+
let mut variable_errors = Vec::new();
961+
let mut provided_vars = HashSet::new();
789962

790963
if let Some(variables) = variables {
791964
for (python_key, python_value) in variables {
@@ -797,6 +970,7 @@ mod rustfluent {
797970
)));
798971
}
799972
let key = python_key.to_string();
973+
provided_vars.insert(key.clone());
800974
// Set the variable value as a string or integer,
801975
// raising a TypeError if not.
802976
if python_value.is_instance_of::<PyString>() {
@@ -811,17 +985,58 @@ mod rustfluent {
811985
args.set(key, chrono_date.format("%Y-%m-%d").to_string());
812986
} else {
813987
// The variable value was of an unsupported type.
814-
// Fall back to displaying the variable key as its value.
988+
// Collect error and fall back to displaying the variable key
989+
variable_errors.push(FormatError {
990+
error_type: "InvalidVariableType".to_string(),
991+
message: format!(
992+
"Variable '{}' has unsupported type, expected str/int/date. Using key as fallback.",
993+
key
994+
),
995+
message_id: Some(identifier.to_string()),
996+
variable_name: Some(key.clone()),
997+
expected_type: Some("str|int|date".to_string()),
998+
actual_type: Some(format!("{:?}", python_value.get_type().name())),
999+
});
8151000
let fallback_value = key.clone();
8161001
args.set(key, fallback_value);
8171002
}
8181003
}
8191004
}
8201005

821-
let mut errors = vec![];
1006+
// Check for missing variables
1007+
for required_var in &required_vars {
1008+
if !provided_vars.contains(required_var) {
1009+
variable_errors.push(FormatError {
1010+
error_type: "MissingVariable".to_string(),
1011+
message: format!("Unknown external: {}", required_var),
1012+
message_id: Some(identifier.to_string()),
1013+
variable_name: Some(required_var.clone()),
1014+
expected_type: None,
1015+
actual_type: None,
1016+
});
1017+
}
1018+
}
1019+
1020+
// Format the message and collect errors
1021+
let mut format_errors = vec![];
8221022
let value = self
8231023
.bundle
824-
.format_pattern(pattern, Some(&args), &mut errors);
1024+
.format_pattern(pattern, Some(&args), &mut format_errors);
1025+
1026+
// Convert and append all errors to the provided list
1027+
if let Some(error_list) = errors {
1028+
// Add variable type errors
1029+
for var_err in variable_errors {
1030+
error_list.append(var_err).ok();
1031+
}
1032+
1033+
// Add format errors (cycles, unknown refs, etc.)
1034+
for format_err in format_errors {
1035+
let py_error = FormatError::from_fluent_error(&format_err);
1036+
error_list.append(py_error).ok();
1037+
}
1038+
}
1039+
8251040
Ok(value.to_string())
8261041
}
8271042
}

0 commit comments

Comments
 (0)