register()
Register a new constraint condition type.
Syntax
@classmethod
def register(
cls,
name: str,
constraint_class: type
)Parameters
name : str, required
- Constraint condition type name
- Required parameter
- Used to identify this constraint type in the configuration dictionary
constraint_class : type, required
- Class implementing the constraint condition
- Required parameter
- Must inherit from
BaseConstrainer
Return Value
None
Description
\1 is a class method that allows users to extend Constrainer functionality by registering custom constraint types.
Built-in Constraint Types
Constrainer has the following constraint types registered by default:
nan_groups:\1field_constraints:\1field_combinations:\1field_proportions:\1
Custom Constraint Class Requirements
Custom constraint classes must:
- Inherit from
BaseConstrainerabstract base class - Implement
validate_config(df: pd.DataFrame) -> boolmethod - Implement
apply(df: pd.DataFrame) -> pd.DataFramemethod - Accept configuration parameter in
__init__
Basic Examples
Create Simple Custom Constraint
from petsard import Constrainer
from petsard.constrainer.constrainer_base import BaseConstrainer
import pandas as pd
class MinRowsConstrainer(BaseConstrainer):
"""Ensure data has at least specified number of rows"""
def __init__(self, config: dict):
self.min_rows = config.get('min_rows', 10)
def validate_config(self, df: pd.DataFrame) -> bool:
"""Validate configuration validity"""
return isinstance(self.min_rows, int) and self.min_rows > 0
def apply(self, df: pd.DataFrame) -> pd.DataFrame:
"""Apply constraint"""
if len(df) < self.min_rows:
raise ValueError(f"Data row count {len(df)} less than minimum requirement {self.min_rows}")
return df
# Register custom constraint
Constrainer.register('min_rows', MinRowsConstrainer)
# Use custom constraint
config = {
'min_rows': {'min_rows': 50},
'field_constraints': [
"age >= 18"
]
}
constrainer = Constrainer(config)
result = constrainer.apply(df)Create Field Range Constraint
from petsard import Constrainer
from petsard.constrainer.constrainer_base import BaseConstrainer
import pandas as pd
class FieldRangeConstrainer(BaseConstrainer):
"""Constrain field range based on percentiles"""
def __init__(self, config: dict):
"""
config: {
'field_name': {
'lower_percentile': 5, # Remove values below 5%
'upper_percentile': 95 # Remove values above 95%
}
}
"""
self.config = config
def validate_config(self, df: pd.DataFrame) -> bool:
"""Validate configuration"""
for field, params in self.config.items():
if field not in df.columns:
return False
if not (0 <= params.get('lower_percentile', 0) <= 100):
return False
if not (0 <= params.get('upper_percentile', 100) <= 100):
return False
return True
def apply(self, df: pd.DataFrame) -> pd.DataFrame:
"""Apply percentile constraints"""
result = df.copy()
for field, params in self.config.items():
lower_p = params.get('lower_percentile', 0)
upper_p = params.get('upper_percentile', 100)
lower_val = result[field].quantile(lower_p / 100)
upper_val = result[field].quantile(upper_p / 100)
mask = (result[field] >= lower_val) & (result[field] <= upper_val)
result = result[mask]
return result.reset_index(drop=True)
# Register constraint
Constrainer.register('field_range', FieldRangeConstrainer)
# Use
config = {
'field_range': {
'salary': {
'lower_percentile': 10, # Remove lowest 10%
'upper_percentile': 90 # Remove highest 10%
},
'age': {
'lower_percentile': 5,
'upper_percentile': 95
}
}
}
constrainer = Constrainer(config)
result = constrainer.apply(df)Advanced Examples
Create Dependency Constraint
from petsard import Constrainer
from petsard.constrainer.constrainer_base import BaseConstrainer
import pandas as pd
class DependencyConstrainer(BaseConstrainer):
"""Constrain dependencies between fields"""
def __init__(self, config: list):
"""
config: [
{
'if': {'field': 'status', 'value': 'active'},
'then': {'field': 'last_login', 'condition': 'IS NOT pd.NA'}
}
]
"""
self.rules = config
def validate_config(self, df: pd.DataFrame) -> bool:
"""Validate configuration"""
for rule in self.rules:
if_field = rule['if']['field']
then_field = rule['then']['field']
if if_field not in df.columns or then_field not in df.columns:
return False
return True
def apply(self, df: pd.DataFrame) -> pd.DataFrame:
"""Apply dependency constraints"""
result = df.copy()
for rule in self.rules:
if_field = rule['if']['field']
if_value = rule['if']['value']
then_field = rule['then']['field']
then_condition = rule['then']['condition']
# Create condition mask
if_mask = result[if_field] == if_value
if then_condition == 'IS NOT pd.NA':
then_mask = result[then_field].notna()
elif then_condition == 'IS pd.NA':
then_mask = result[then_field].isna()
else:
# Can extend other conditions
continue
# Retain rows satisfying "if...then..." rules
result = result[~if_mask | (if_mask & then_mask)]
return result.reset_index(drop=True)
# Register constraint
Constrainer.register('dependency', DependencyConstrainer)
# Use
config = {
'dependency': [
{
'if': {'field': 'employed', 'value': 'yes'},
'then': {'field': 'salary', 'condition': 'IS NOT pd.NA'}
},
{
'if': {'field': 'has_children', 'value': 'yes'},
'then': {'field': 'num_children', 'condition': 'IS NOT pd.NA'}
}
]
}
constrainer = Constrainer(config)
result = constrainer.apply(df)Create Statistical Distribution Constraint
from petsard import Constrainer
from petsard.constrainer.constrainer_base import BaseConstrainer
import pandas as pd
import numpy as np
class OutlierConstrainer(BaseConstrainer):
"""Remove statistical outliers"""
def __init__(self, config: dict):
"""
config: {
'field_name': {
'method': 'iqr', # or 'zscore'
'threshold': 1.5 # IQR multiple or Z-score threshold
}
}
"""
self.config = config
def validate_config(self, df: pd.DataFrame) -> bool:
"""Validate configuration"""
for field in self.config.keys():
if field not in df.columns:
return False
return True
def apply(self, df: pd.DataFrame) -> pd.DataFrame:
"""Remove outliers"""
result = df.copy()
for field, params in self.config.items():
method = params.get('method', 'iqr')
threshold = params.get('threshold', 1.5)
if method == 'iqr':
Q1 = result[field].quantile(0.25)
Q3 = result[field].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - threshold * IQR
upper_bound = Q3 + threshold * IQR
mask = (result[field] >= lower_bound) & (result[field] <= upper_bound)
elif method == 'zscore':
z_scores = np.abs((result[field] - result[field].mean()) / result[field].std())
mask = z_scores < threshold
else:
continue
result = result[mask]
return result.reset_index(drop=True)
# Register constraint
Constrainer.register('outlier', OutlierConstrainer)
# Use
config = {
'outlier': {
'salary': {
'method': 'iqr',
'threshold': 1.5
},
'age': {
'method': 'zscore',
'threshold': 3
}
}
}
constrainer = Constrainer(config)
result = constrainer.apply(df)Combine Built-in and Custom Constraints
from petsard import Constrainer
from petsard.constrainer.constrainer_base import BaseConstrainer
import pandas as pd
# Define custom constraint class
class UniqueValueConstrainer(BaseConstrainer):
"""Ensure values in specified fields are unique"""
def __init__(self, config: list):
self.unique_fields = config
def validate_config(self, df: pd.DataFrame) -> bool:
for field in self.unique_fields:
if field not in df.columns:
return False
return True
def apply(self, df: pd.DataFrame) -> pd.DataFrame:
result = df.copy()
for field in self.unique_fields:
result = result.drop_duplicates(subset=[field])
return result.reset_index(drop=True)
# Register
Constrainer.register('unique_values', UniqueValueConstrainer)
# Combine built-in and custom constraints
config = {
# Built-in constraints
'nan_groups': {
'name': 'delete'
},
'field_constraints': [
"age >= 18 & age <= 65"
],
'field_combinations': [
(
{'education': 'salary'},
{'PhD': [70000, 80000, 90000]}
)
],
# Custom constraints
'unique_values': ['email', 'id']
}
constrainer = Constrainer(config)
result = constrainer.apply(df)Important Notes
- Class Method: register() is a class method (@classmethod), can be called without instance
- Inheritance Requirement: Custom classes must inherit from
BaseConstrainer, otherwise raises ValueError - Global Registration: Registered constraint types are globally available, affecting all Constrainer instances
- Override Built-in: Can register same name to override built-in constraint types (not recommended)
- Execution Order: Custom constraints execute in the order they appear in config
- Error Handling: Recommend adding comprehensive error handling and validation in custom constraints
- Performance Considerations: Complex custom constraints may impact overall performance
- Testing Recommendation: Thoroughly test custom constraint correctness before actual use
- Documentation: Write clear docstrings for custom constraint classes
Related Methods
\1: Initialize constraint configuration\1: Apply constraint conditions\1: Resample repeatedly until constraints satisfied