""" Enhanced error introduction system for editing tasks. This module provides error introduction capabilities for creating challenging editing tasks with Pydantic models. It supports various error types, constraint violations, and complex data structures. """ import copy import random import re import string from enum import Enum from typing import Any, Dict, List, Optional, Type, Union, get_args, get_origin from pydantic import BaseModel, ValidationError from pydantic.fields import FieldInfo class ErrorType(Enum): """Types of errors that can be introduced.""" TYPE_ERROR = "type_error" CONSTRAINT_ERROR = "constraint_error" FORMAT_ERROR = "format_error" ENUM_ERROR = "enum_error" REQUIRED_FIELD_MISSING = "required_field_missing" EXTRA_FIELD = "extra_field" NESTED_ERROR = "nested_error" LIST_ERROR = "list_error" VALIDATOR_ERROR = "validator_error" class ErrorIntroductionConfig: """Configuration for error introduction.""" def __init__( self, error_types: List[ErrorType] = None, max_errors: int = 1, probability: float = 1.0, seed: Optional[int] = None, ): if error_types is None: self.error_types = [ ErrorType.TYPE_ERROR, ErrorType.CONSTRAINT_ERROR, ErrorType.FORMAT_ERROR, ErrorType.ENUM_ERROR, ErrorType.REQUIRED_FIELD_MISSING, ] else: self.error_types = error_types self.max_errors = max_errors self.probability = probability self.seed = seed @classmethod def from_env_config( cls, error_types_enabled: List[str], max_errors_per_item: int = 1, error_introduction_probability: float = 1.0, error_introduction_seed: Optional[int] = None, ): """Create config from environment config parameters.""" error_type_mapping = { "type_error": ErrorType.TYPE_ERROR, "constraint_error": ErrorType.CONSTRAINT_ERROR, "format_error": ErrorType.FORMAT_ERROR, "enum_error": ErrorType.ENUM_ERROR, "required_field_missing": ErrorType.REQUIRED_FIELD_MISSING, "extra_field": ErrorType.EXTRA_FIELD, "nested_error": ErrorType.NESTED_ERROR, "list_error": ErrorType.LIST_ERROR, "validator_error": ErrorType.VALIDATOR_ERROR, } error_types = [ error_type_mapping[name] for name in error_types_enabled if name in error_type_mapping ] return cls( error_types=error_types, max_errors=max_errors_per_item, probability=error_introduction_probability, seed=error_introduction_seed, ) def introduce_error_for_pydantic( data: Any, pydantic_model: Type[BaseModel], config: Optional[ErrorIntroductionConfig] = None, seed: Optional[int] = None, # Backward compatibility ) -> Optional[Any]: """ Introduce sophisticated errors into valid data for Pydantic model editing tasks. Args: data: Valid data structure (should be compatible with pydantic_model) pydantic_model: Pydantic model class to violate config: Error introduction configuration seed: Optional seed for backward compatibility Returns: Modified data with error(s), or None if no error could be introduced """ # Handle backward compatibility if config is None: config = ErrorIntroductionConfig(seed=seed) elif seed is not None: config.seed = seed if config.seed is not None: random.seed(config.seed) # Check probability if random.random() > config.probability: return None # Validate input if not isinstance(data, dict): return None # Check if any error types are configured if not config.error_types: return None # Analyze the Pydantic model try: model_analysis = _analyze_pydantic_model(pydantic_model) except Exception: return None erroneous_data = copy.deepcopy(data) errors_introduced = 0 max_attempts = 50 # Prevent infinite loops # Shuffle error types for variety available_error_types = config.error_types.copy() random.shuffle(available_error_types) for attempt in range(max_attempts): if errors_introduced >= config.max_errors: break if not available_error_types: break error_type = available_error_types[attempt % len(available_error_types)] try: original_data = copy.deepcopy(erroneous_data) success = False if error_type == ErrorType.TYPE_ERROR: success = _introduce_type_error(erroneous_data, model_analysis) elif error_type == ErrorType.CONSTRAINT_ERROR: success = _introduce_constraint_error(erroneous_data, model_analysis) elif error_type == ErrorType.FORMAT_ERROR: success = _introduce_format_error(erroneous_data, model_analysis) elif error_type == ErrorType.ENUM_ERROR: success = _introduce_enum_error(erroneous_data, model_analysis) elif error_type == ErrorType.REQUIRED_FIELD_MISSING: success = _introduce_required_field_error( erroneous_data, model_analysis ) elif error_type == ErrorType.EXTRA_FIELD: success = _introduce_extra_field_error(erroneous_data, model_analysis) elif error_type == ErrorType.NESTED_ERROR: success = _introduce_nested_error( erroneous_data, model_analysis, pydantic_model ) elif error_type == ErrorType.LIST_ERROR: success = _introduce_list_error(erroneous_data, model_analysis) elif error_type == ErrorType.VALIDATOR_ERROR: success = _introduce_validator_error(erroneous_data, model_analysis) if success: # Verify the error was actually introduced try: pydantic_model(**erroneous_data) # If validation passes, error wasn't introduced properly erroneous_data = original_data except ValidationError: # Error successfully introduced errors_introduced += 1 except Exception: # Restore data if error introduction failed erroneous_data = original_data continue # Return result if any errors were introduced if errors_introduced > 0 and erroneous_data != data: return erroneous_data return None def _analyze_pydantic_model(model: Type[BaseModel]) -> Dict[str, Any]: """Analyze a Pydantic model to understand its structure and constraints.""" analysis = {"fields": {}, "validators": {}, "nested_models": {}, "enums": {}} model_fields = model.model_fields for field_name, field_info in model_fields.items(): field_analysis = _analyze_field(field_name, field_info) analysis["fields"][field_name] = field_analysis # Track nested models if field_analysis.get("nested_model"): analysis["nested_models"][field_name] = field_analysis["nested_model"] # Track enums if field_analysis.get("enum_values"): analysis["enums"][field_name] = field_analysis["enum_values"] # Analyze custom validators (Pydantic v2) if hasattr(model, "__pydantic_validators__"): analysis["validators"] = model.__pydantic_validators__ elif hasattr(model, "__pydantic_decorators__"): analysis["validators"] = model.__pydantic_decorators__ # Mark fields that have validators for field_name, field_info in model_fields.items(): field_analysis = analysis["fields"].get(field_name, {}) # Check if this field has any validators if hasattr(model, "__pydantic_decorators__"): decorators = model.__pydantic_decorators__ if hasattr(decorators, "field_validators"): # Check if any field validator applies to this field for ( validator_name, validator_info, ) in decorators.field_validators.items(): if hasattr(validator_info, "info") and hasattr( validator_info.info, "fields" ): if field_name in validator_info.info.fields: field_analysis["has_validators"] = True break analysis["fields"][field_name] = field_analysis return analysis def _analyze_field(field_name: str, field_info: FieldInfo) -> Dict[str, Any]: """Analyze a single Pydantic field.""" field_analysis = { "name": field_name, "type": "unknown", "required": field_info.is_required(), "default": field_info.default if field_info.default is not None else None, "constraints": {}, "has_validators": False, "nested_model": None, "enum_values": None, "is_list": False, "is_dict": False, "is_optional": False, } # Analyze the annotation annotation = field_info.annotation # Handle Union types (including Optional) if get_origin(annotation) is Union: args = get_args(annotation) if len(args) == 2 and type(None) in args: # This is Optional[T] field_analysis["is_optional"] = True # Get the non-None type annotation = next(arg for arg in args if arg is not type(None)) # Handle List types if get_origin(annotation) is list: field_analysis["is_list"] = True field_analysis["type"] = "list" list_args = get_args(annotation) if list_args: field_analysis["list_item_type"] = list_args[0] # Handle Dict types elif get_origin(annotation) is dict: field_analysis["is_dict"] = True field_analysis["type"] = "dict" # Handle basic types elif annotation in (str, int, float, bool): field_analysis["type"] = annotation.__name__ # Handle special Pydantic types elif hasattr(annotation, "__name__"): if annotation.__name__ in ["EmailStr", "HttpUrl", "UUID"]: field_analysis["type"] = annotation.__name__.lower() elif annotation.__name__ == "datetime": field_analysis["type"] = "datetime" elif annotation.__name__ == "date": field_analysis["type"] = "date" elif annotation.__name__ == "Decimal": field_analysis["type"] = "decimal" # Handle Enums if hasattr(annotation, "__bases__") and Enum in annotation.__bases__: field_analysis["type"] = "enum" field_analysis["enum_values"] = [e.value for e in annotation] # Handle nested Pydantic models try: if hasattr(annotation, "__bases__") and any( issubclass(base, BaseModel) for base in annotation.__bases__ if base != BaseModel ): field_analysis["type"] = "nested_model" field_analysis["nested_model"] = annotation elif hasattr(annotation, "__mro__") and BaseModel in annotation.__mro__: field_analysis["type"] = "nested_model" field_analysis["nested_model"] = annotation except (TypeError, AttributeError): # If we can't determine if it's a BaseModel, skip pass # Extract constraints from Field() - Pydantic v2 approach if hasattr(field_info, "constraints") and field_info.constraints: for constraint_name, constraint_value in field_info.constraints.items(): if constraint_value is not None: field_analysis["constraints"][constraint_name] = constraint_value # Extract common Field constraints directly from field_info constraint_attrs = ["min_length", "max_length", "ge", "le", "gt", "lt", "pattern"] for attr in constraint_attrs: if hasattr(field_info, attr): value = getattr(field_info, attr) if value is not None: field_analysis["constraints"][attr] = value # Also check if it's in the metadata elif hasattr(field_info, "metadata"): for metadata_item in field_info.metadata: if hasattr(metadata_item, attr): value = getattr(metadata_item, attr) if value is not None: field_analysis["constraints"][attr] = value break return field_analysis def _introduce_type_error(data: Dict[str, Any], model_analysis: Dict[str, Any]) -> bool: """Introduce type errors by changing value types.""" fields = model_analysis["fields"] for field_name, field_info in fields.items(): if field_name not in data: continue current_value = data[field_name] field_type = field_info["type"] # Skip if optional and current value is None if field_info["is_optional"] and current_value is None: continue # Introduce type-specific errors if field_type == "str" and not isinstance(current_value, str): continue elif field_type == "int" and not isinstance(current_value, int): continue elif field_type == "bool" and not isinstance(current_value, bool): continue # Create type mismatches if field_type == "str" and isinstance(current_value, str): data[field_name] = random.choice([42, True, [], {}]) return True elif field_type == "int" and isinstance(current_value, int): data[field_name] = random.choice(["not_a_number", True, []]) return True elif field_type == "bool" and isinstance(current_value, bool): data[field_name] = random.choice(["not_a_boolean", 42]) return True elif field_type == "list" and isinstance(current_value, list): data[field_name] = "not_a_list" return True elif field_type == "dict" and isinstance(current_value, dict): data[field_name] = "not_a_dict" return True return False def _introduce_constraint_error( data: Dict[str, Any], model_analysis: Dict[str, Any] ) -> bool: """Introduce constraint violation errors.""" fields = model_analysis["fields"] for field_name, field_info in fields.items(): if field_name not in data: continue current_value = data[field_name] constraints = field_info["constraints"] if not constraints: continue # String length constraints if isinstance(current_value, str): if ( "min_length" in constraints and len(current_value) >= constraints["min_length"] ): # Make it too short data[field_name] = "x" * (constraints["min_length"] - 1) return True elif ( "max_length" in constraints and len(current_value) <= constraints["max_length"] ): # Make it too long data[field_name] = "x" * (constraints["max_length"] + 10) return True # Numeric constraints elif isinstance(current_value, (int, float)): if "ge" in constraints and current_value >= constraints["ge"]: # Make it too small data[field_name] = constraints["ge"] - 1 return True elif "gt" in constraints and current_value > constraints["gt"]: # Make it too small data[field_name] = constraints["gt"] return True elif "le" in constraints and current_value <= constraints["le"]: # Make it too large data[field_name] = constraints["le"] + 1 return True elif "lt" in constraints and current_value < constraints["lt"]: # Make it too large data[field_name] = constraints["lt"] return True # List constraints elif isinstance(current_value, list): if ( "max_items" in constraints and len(current_value) <= constraints["max_items"] ): # Add too many items extra_items = ["extra"] * ( constraints["max_items"] + 5 - len(current_value) ) data[field_name] = current_value + extra_items return True elif ( "min_items" in constraints and len(current_value) >= constraints["min_items"] ): # Remove too many items items_to_keep = max(0, constraints["min_items"] - 1) data[field_name] = current_value[:items_to_keep] return True return False def _mutate_string(s: str, mutation_rate: float = 0.1) -> str: """Apply character-level mutations to a string.""" if not s: return s mutations = [] for i, char in enumerate(s): if random.random() < mutation_rate: mutation_type = random.choice( ["swap", "delete", "insert", "duplicate", "case"] ) if mutation_type == "swap" and i < len(s) - 1: # Swap with next character mutations.append(s[:i] + s[i + 1] + s[i] + s[i + 2 :]) elif mutation_type == "delete": # Delete character mutations.append(s[:i] + s[i + 1 :]) elif mutation_type == "insert": # Insert random character random_char = random.choice( string.ascii_letters + string.digits + ".-_@" ) mutations.append(s[:i] + random_char + s[i:]) elif mutation_type == "duplicate": # Duplicate character mutations.append(s[:i] + char + char + s[i + 1 :]) elif mutation_type == "case" and char.isalpha(): # Change case mutations.append(s[:i] + char.swapcase() + s[i + 1 :]) return mutations[0] if mutations else s def _generate_invalid_email(valid_email: str = "") -> str: """Generate invalid email addresses dynamically.""" strategies = [ # Character mutations lambda: _mutate_string( valid_email if valid_email and "@" in valid_email else "user@example.com" ), # Missing @ symbol lambda: ( valid_email.replace("@", "") if "@" in valid_email else "userexample.com" ), # Multiple @ symbols lambda: ( valid_email.replace("@", "@@") if "@" in valid_email else "user@@example.com" ), # Missing domain lambda: valid_email.split("@")[0] + "@" if "@" in valid_email else "user@", # Missing local part lambda: "@" + (valid_email.split("@")[1] if "@" in valid_email else "example.com"), # Invalid characters lambda: f"{random.choice(['user name', 'user..name', '.user', 'user.'])}@example.com", # Invalid domain lambda: f"user@{random.choice(['', '.com', 'example.', 'example..com', '123.456.789.0'])}", # Spaces in various positions lambda: f"{random.choice(['user ', ' user', 'us er'])}@{random.choice(['exam ple.com', 'example.com '])}", # Invalid TLD lambda: f"user@example.{random.choice(['', 'c', 'toolong', '123', 'com.', '.com'])}", # Special cases lambda: random.choice( [ "user@[not.an.ip]", "user@localhost", "user name@example.com", "user@example@com", "user#example.com", "user@example,com", "(comment)user@example.com", "user@exam ple.com", "user@-example.com", "user@example-.com", ] ), ] return random.choice(strategies)() def _generate_invalid_url(valid_url: str = "") -> str: """Generate invalid URLs dynamically.""" strategies = [ # Character mutations lambda: _mutate_string( valid_url if valid_url.startswith(("http://", "https://")) else "https://example.com" ), # Missing protocol lambda: ( valid_url.replace("http://", "").replace("https://", "") if valid_url else "www.example.com" ), # Invalid protocol lambda: ( f"{random.choice(['htp://', 'htttp://', 'http:/', 'http:/'])}" f"{valid_url.split('://', 1)[1] if '://' in valid_url else 'example.com'}" ), # Missing domain lambda: random.choice(["http://", "https://", "http:///path"]), # Invalid port lambda: f"http://example.com:{random.choice(['', '99999', '-80', 'abc', '65536'])}", # Invalid characters in domain lambda: f"http://{random.choice(['exam ple', 'exam_ple', 'exam@ple', 'exam#ple', '-example', 'example-'])}.com", # Invalid TLD lambda: f"http://example.{random.choice(['', 'c', '123', 'verylongtld', '.com', 'com.'])}", # Spaces in URL lambda: f"http://{random.choice(['example .com', 'exam ple.com', 'example.com '])}/path", # Double slashes in wrong places lambda: "http:example.com" if not valid_url else valid_url.replace("://", ":"), # Special cases lambda: random.choice( [ "javascript:alert('xss')", "file:///etc/passwd", "ftp://unsupported.com", "http://[not:valid:ipv6]", "http://300.300.300.300", "http://example..com", "http://.example.com", "http://example.com..", "http://exam ple.com", "http://example.com/path with spaces", "http://user:pass:word@example.com", "ht!tp://example.com", ] ), ] return random.choice(strategies)() def _generate_invalid_uuid(valid_uuid: str = "") -> str: """Generate invalid UUIDs dynamically.""" # Standard UUID format: 8-4-4-4-12 hexadecimal digits strategies = [ # Character mutations lambda: _mutate_string( valid_uuid if len(valid_uuid) > 30 else "550e8400-e29b-41d4-a716-446655440000" ), # Wrong length - too short lambda: "550e8400-e29b-41d4-a716", # Wrong length - too long lambda: "550e8400-e29b-41d4-a716-446655440000-extra", # Invalid characters (not hexadecimal) lambda: ( f"{''.join(random.choices('ghijklmnopqrstuvwxyz!@#$%', k=8))}-" f"{''.join(random.choices('0123456789abcdef', k=4))}-" f"{''.join(random.choices('0123456789abcdef', k=4))}-" f"{''.join(random.choices('0123456789abcdef', k=4))}-" f"{''.join(random.choices('0123456789abcdef', k=12))}" ), # Wrong format - missing hyphens lambda: "550e8400e29b41d4a716446655440000", # Wrong format - extra hyphens lambda: "550e-8400-e29b-41d4-a716-4466-5544-0000", # Wrong format - wrong positions for hyphens lambda: "550e84-00e29b-41d4-a716446655440000", # Mixed case (some UUID validators are case-sensitive) lambda: "550E8400-E29B-41D4-A716-446655440000", # Partially valid lambda: f"550e8400-e29b-41d4-{random.choice(['xxxx', '12345', 'a71g'])}-446655440000", # Special cases lambda: random.choice( [ "not-a-uuid", "00000000-0000-0000-0000-000000000000", # Might be rejected as nil UUID "550e8400_e29b_41d4_a716_446655440000", # Underscores instead of hyphens "{550e8400-e29b-41d4-a716-446655440000}", # With braces "550e8400e29b41d4a716446655440000Z", # Extra character "550e8400-e29b-11d4-a716-446655440000", # Wrong version "g50e8400-e29b-41d4-a716-446655440000", # Invalid hex at start ] ), ] return random.choice(strategies)() def _introduce_format_error( data: Dict[str, Any], model_analysis: Dict[str, Any] ) -> bool: """Introduce format-specific errors for special types using dynamic generation.""" fields = model_analysis["fields"] for field_name, field_info in fields.items(): if field_name not in data: continue current_value = data[field_name] field_type = field_info["type"] if not isinstance(current_value, str): continue if field_type == "emailstr": # Dynamically generate invalid emails data[field_name] = _generate_invalid_email(current_value) return True elif field_type == "httpurl": # Dynamically generate invalid URLs data[field_name] = _generate_invalid_url(current_value) return True elif field_type == "uuid": # Dynamically generate invalid UUIDs data[field_name] = _generate_invalid_uuid(current_value) return True # Pattern constraint violations elif "pattern" in field_info["constraints"]: pattern = field_info["constraints"]["pattern"] if re.match(pattern, current_value): # Generate a string that doesn't match data[field_name] = "INVALID_FORMAT_123!@#" return True # Heuristic format errors for common field names elif field_name.lower() in [ "email", "e_mail", "email_address", "contact_email", "user_email", ]: # Treat as email even if not EmailStr type if "@" in current_value and "." in current_value: data[field_name] = _generate_invalid_email(current_value) return True elif field_name.lower() in [ "url", "website", "link", "uri", "homepage", "site_url", ]: # Treat as URL even if not HttpUrl type if current_value.startswith(("http://", "https://")): data[field_name] = _generate_invalid_url(current_value) return True elif field_name.lower() in [ "uuid", "id", "guid", "uuid_field", "identifier", "unique_id", ]: # Treat as UUID-like field if len(current_value) > 30: # Likely a UUID data[field_name] = _generate_invalid_uuid(current_value) return True return False def _introduce_enum_error(data: Dict[str, Any], model_analysis: Dict[str, Any]) -> bool: """Introduce enum value errors with dynamic generation.""" enums = model_analysis["enums"] for field_name, enum_values in enums.items(): if field_name not in data: continue current_value = data[field_name] if current_value in enum_values: # Dynamically generate invalid enum values strategies = [ # Case variations of valid values lambda: ( current_value.upper() if current_value.islower() else current_value.lower() ), lambda: current_value.swapcase(), lambda: ( current_value.capitalize() if current_value.islower() else current_value ), # Character mutations of valid values lambda: _mutate_string(current_value, mutation_rate=0.2), # Numeric variations lambda: str(random.randint(0, 999)), # Prefixed/suffixed variations lambda: random.choice(["INVALID_", "WRONG_", "BAD_"]) + current_value, lambda: current_value + random.choice(["_INVALID", "_WRONG", "2"]), # Common typos lambda: ( current_value[:-1] if len(current_value) > 1 else current_value + "x" ), lambda: ( current_value[1:] if len(current_value) > 1 else "x" + current_value ), # Related but wrong values lambda: random.choice( [ "undefined", "null", "none", "unknown", "default", "true", "false", "0", "1", "-1", "yes", "no", "N/A", "TBD", ] ), # Empty or None lambda: random.choice(["", None]), ] # Try to generate a value that's definitely not in the enum for _ in range(10): invalid_value = random.choice(strategies)() if invalid_value not in enum_values: data[field_name] = invalid_value return True # Fallback to a definitely invalid value data[field_name] = ( f"DEFINITELY_NOT_A_VALID_ENUM_{random.randint(1000, 9999)}" ) return True return False def _introduce_required_field_error( data: Dict[str, Any], model_analysis: Dict[str, Any] ) -> bool: """Remove required fields.""" fields = model_analysis["fields"] required_fields = [ name for name, field_info in fields.items() if field_info["required"] and name in data ] if required_fields: field_to_remove = random.choice(required_fields) del data[field_to_remove] return True return False def _convert_case(field_name: str) -> str: """Convert between camelCase and snake_case.""" if "_" in field_name: # snake_case to camelCase parts = field_name.split("_") return parts[0] + "".join(word.capitalize() for word in parts[1:]) else: # camelCase to snake_case (simple version) result = [] for i, char in enumerate(field_name): if char.isupper() and i > 0: result.append("_") result.append(char.lower()) return "".join(result) def _introduce_extra_field_error( data: Dict[str, Any], model_analysis: Dict[str, Any] ) -> bool: """Add unexpected extra fields with context-aware generation.""" # Analyze existing fields to generate plausible but wrong field names existing_fields = list(data.keys()) strategies = [ # Variations of existing fields lambda: ( random.choice(existing_fields) + random.choice(["2", "_new", "_old", "_temp", "_backup"]) if existing_fields else "extra_field" ), lambda: ( random.choice(existing_fields) + random.choice(["_id", "_name", "_value", "_type"]) if existing_fields else "extra_field" ), lambda: ( random.choice(["temp_", "old_", "new_", "backup_"]) + random.choice(existing_fields) if existing_fields else "extra_field" ), # Common typos of existing fields lambda: ( _mutate_string(random.choice(existing_fields), mutation_rate=0.15) if existing_fields else "extra_field" ), # Commonly mistaken fields lambda: random.choice( [ "id", "ID", "_id", "uid", "uuid", "created_at", "updated_at", "timestamp", "is_active", "active", "enabled", "status", "name", "title", "description", "value", "type", "kind", "category", "class", "data", "metadata", "extra", "additional_info", "user_id", "user", "owner", "author", "count", "total", "amount", "quantity", "_internal", "__private", "debug_info", ] ), # Underscored versions lambda: ( "_" + random.choice(existing_fields) if existing_fields and not existing_fields[0].startswith("_") else "_extra_field" ), # Camelcase/snake_case conversions lambda: ( _convert_case(random.choice(existing_fields)) if existing_fields else "extraField" ), ] # Generate field name field_name = random.choice(strategies)() # Ensure uniqueness counter = 1 original_name = field_name while field_name in data: field_name = f"{original_name}_{counter}" counter += 1 # Generate contextual value based on field name value_strategies = [ lambda: "default_value", lambda: random.randint(0, 100), lambda: random.choice([True, False]), lambda: [], lambda: {}, lambda: None, lambda: random.choice([0.0, 1.0, -1.0]), lambda: f"value_{random.randint(1, 1000)}", lambda: {"nested": "data"}, lambda: [1, 2, 3], ] # If field name suggests a type, use appropriate value if any(suffix in field_name.lower() for suffix in ["_id", "id", "_uid"]): data[field_name] = random.choice( [random.randint(1, 10000), f"id_{random.randint(1000, 9999)}"] ) elif any(suffix in field_name.lower() for suffix in ["_at", "timestamp", "date"]): data[field_name] = random.choice( ["2024-01-01", "2024-01-01T00:00:00Z", 1704067200] ) elif any( suffix in field_name.lower() for suffix in ["is_", "has_", "enabled", "active"] ): data[field_name] = random.choice([True, False, 1, 0, "true", "false"]) else: data[field_name] = random.choice(value_strategies)() return True def _introduce_nested_error( data: Dict[str, Any], model_analysis: Dict[str, Any], pydantic_model: Type[BaseModel], ) -> bool: """Introduce errors in nested objects.""" nested_models = model_analysis["nested_models"] for field_name, nested_model_class in nested_models.items(): if field_name not in data or not isinstance(data[field_name], dict): continue # Recursively introduce errors in nested objects nested_config = ErrorIntroductionConfig( error_types=[ ErrorType.TYPE_ERROR, ErrorType.CONSTRAINT_ERROR, ErrorType.FORMAT_ERROR, ], max_errors=1, ) erroneous_nested = introduce_error_for_pydantic( data[field_name], nested_model_class, nested_config ) if erroneous_nested is not None: data[field_name] = erroneous_nested return True return False def _introduce_list_error(data: Dict[str, Any], model_analysis: Dict[str, Any]) -> bool: """Introduce errors in list fields.""" fields = model_analysis["fields"] for field_name, field_info in fields.items(): if not field_info["is_list"] or field_name not in data: continue current_list = data[field_name] if not isinstance(current_list, list) or not current_list: continue # Different list error strategies error_strategies = [ lambda lst: lst + [{"invalid": "item"}], # Add invalid item lambda lst: lst + ["wrong_type"], # Add wrong type item lambda lst: [None] + lst, # Add None item lambda lst: lst[:1] if len(lst) > 1 else lst + ["extra"], # Wrong length ] strategy = random.choice(error_strategies) try: data[field_name] = strategy(current_list.copy()) return True except Exception: continue return False def _introduce_validator_error( data: Dict[str, Any], model_analysis: Dict[str, Any] ) -> bool: """Introduce errors that would trigger custom validators.""" fields = model_analysis["fields"] for field_name, field_info in fields.items(): if not field_info["has_validators"] or field_name not in data: continue current_value = data[field_name] # Common validator violations if isinstance(current_value, str): validator_violating_values = [ "", # Empty string " ", # Whitespace only "\n\t", # Whitespace characters " " + current_value, # Leading whitespace current_value + " ", # Trailing whitespace ] data[field_name] = random.choice(validator_violating_values) return True return False # Backward compatibility: ensure the old function signature still works def introduce_error_for_pydantic_old( data: Any, pydantic_model: Type[BaseModel], seed: Optional[int] = None ) -> Optional[Any]: """Legacy function signature for backward compatibility.""" return introduce_error_for_pydantic(data, pydantic_model, seed=seed)