import ctypes
import rigidity.errors
[docs]class Rule():
'''
Base rule class implementing a simple apply() method that returns
the given data unchanged.
'''
[docs] def apply(self, value):
'''
This is the default method for applying a rule to data. By
default, the `read()` and `write()` methods will use this
method to validate and modify data.
:param value: the data to be validated.
:returns: the validated and possibly modified value as
documented by the rule.
:raises rigidity.errors.DropRow: when the rule wants to
cancel processing of an entire row, it may do so with
the DropRow error. This signifies to the
:class:`rigidity.Rigidity` class that it should
discontinue processing the row.
'''
return value
[docs] def read(self, value):
'''
When reading data, it is validated with this method. By
default, this method calls the `apply()` method of this
class. However, you may override this method to achieve
different behavior when reading and writing.
:param value: the data to be validated.
:returns: the validated and possibly modified value as
documented by the rule.
:raises rigidity.errors.DropRow: when the rule wants to
cancel processing of an entire row, it may do so with
the DropRow error. This signifies to the
:class:`rigidity.Rigidity` class that it should
discontinue processing the row.
'''
return self.apply(value)
[docs] def write(self, value):
'''
When writing data, it is validated with this method. By
default, this method calls the `apply()` method of this
class. However, you may override this method to achieve
different behavior when reading and writing.
:param value: the data to be validated.
:returns: the validated and possibly modified value as
documented by the rule.
:raises rigidity.errors.DropRow: when the rule wants to
cancel processing of an entire row, it may do so with
the DropRow error. This signifies to the
:class:`rigidity.Rigidity` class that it should
discontinue processing the row.
'''
return self.apply(value)
[docs]class CapitalizeWords(Rule):
'''
Capitalize words in a string. By default, words are detected by
searching for space, tab, new line, and carriage return characters.
You may override this setting.
Also, by default, the first character is capitalized automatically.
'''
SEPERATORS = ' \t\n\r'
[docs] def __init__(self, seperators=SEPERATORS, cap_first=True):
'''
:param str seperators: capitalize any character following a
character in this string.
:param bool cap_first: automatically capitalize the first
character in the string.
'''
self.seperators = seperators
self.cap_first = cap_first
[docs] def apply(self, value):
# Create a unicode buffer. These things are mutable!
buffer = ctypes.create_unicode_buffer(value)
# If capitalization of the first character is desired, capitalize.
if self.cap_first:
buffer[0] = buffer[0].upper()
# Search for all separators in the string
for i in range(0, len(buffer) - 1):
if buffer[i] in self.seperators:
buffer[i + 1] = buffer[i + 1].upper()
# Return the modified buffer
return buffer.value
[docs]class Cary(Rule):
'''
Cary values into subsequent rows lacking values in their column.
'''
#: When an empty cell is encountered and no previous fill value is
#: available, throw an error.
ACTION_ERROR = 1
#: Until a value is encountered, use a default value to fill empty
#: cells.
ACTION_DEFAULT = 2
#: When an empty cell is encountered and no other value is available
#: to fill the cell, drop the row.
ACTION_DROPROW = 3
[docs] def __init__(self, action=ACTION_ERROR, default=None):
'''
:param action: take the behavior indicated by ACTION_ERROR,
ACTION_DEFAULT, or ACTION_DROPROW.
'''
self.action = action
self.previous_available = False
self.previous = default
if action == self.ACTION_DEFAULT:
self.previous_available = True
[docs] def apply(self, value):
if value is None or value == '':
if self.previous_available:
return self.previous
elif self.action == self.ACTION_ERROR:
raise ValueError('Empty cell encountered before a value.')
elif self.action == self.ACTION_DROPROW:
raise rigidity.errors.DropRow()
else:
self.previous = value
self.previous_available = True
return value
[docs]class Boolean(Rule):
'''
Cast a string as a boolean value.
'''
#: When invalid data is encountered, raise an exception.
ACTION_ERROR = 1
#: When invalid data is encountered, return a set defaut value.
ACTION_DEFAULT = 2
#: When invalid data is encountered, drop the row.
ACTION_DROPROW = 3
[docs] def __init__(self, allow_null=False, action=ACTION_ERROR, default=None):
'''
:param action: take the behavior indicated by ACTION_ERROR,
ACTION_DEFAULT, or ACTION_DROPROW.
'''
self.allow_null = allow_null
self.default = default
self.action = action
[docs] def apply(self, value):
lvalue = str(value).lower()
if lvalue in ('true', 'yes', 't', '1'):
return True
elif lvalue in ('false', 'no', 'f', '0'):
return False
elif self.allow_null and lvalue in ('null', 'none', ''):
return None
else:
if self.action == self.ACTION_ERROR:
raise ValueError('Value was not a boolean value')
elif self.action == self.ACTION_DEFAULT:
return self.default
elif self.action == self.ACTION_DROPROW:
raise rigidity.errors.DropRow()
else:
raise ValueError('Value was not a boolean value')
[docs]class Bytes(Rule):
'''
When reading data, encode it as a bytes object using the given
encoding. When writing data, decode it using the given encoding.
'''
[docs] def __init__(self, encoding='utf8'):
self.encoding = encoding
[docs] def read(self, value):
return value.encode(self.encoding)
[docs] def write(self, value):
return value.decode(self.encoding)
[docs]class Contains(Rule):
'''
Check that a string field value contains the string (or all strings
in a list of strings) passed as a parameter to this rule.
'''
[docs] def __init__(self, string):
if isinstance(string, str):
self.strings = [string]
elif isinstance(string, (list, tuple)):
self.strings = string
else:
raise ValueError('string must be a string or a lsit')
[docs] def apply(self, value):
for string in self.strings:
if string not in value:
raise ValueError('String "%s" not in value' % string)
return value
[docs]class Integer(Rule):
'''
Cast all data to ints or die trying.
'''
#: When invalid data is encountered, raise an exception.
ACTION_ERROR = 1
#: When invalid data is encountered, return zero.
ACTION_ZERO = 2
#: When invalid data is encountered, drop the row.
ACTION_DROPROW = 3
[docs] def __init__(self, action=ACTION_ERROR):
'''
:param action: take the behavior indicated by ACTION_ERROR,
ACTION_ZERO, or ACTION_DROPROW.
'''
self.action = action
[docs] def apply(self, value):
try:
return int(value)
except ValueError as err:
if self.action == self.ACTION_ERROR:
raise err
elif self.action == self.ACTION_ZERO:
return 0
elif self.action == self.ACTION_DROPROW:
raise rigidity.errors.DropRow()
else:
raise err
[docs]class Float(Rule):
'''
Cast all data to floats or die trying.
'''
#: When invalid data is encountered, raise an exception.
ACTION_ERROR = 1
#: When invalid data is encountered, return zero.
ACTION_ZERO = 2
#: When invalid data is encountered, drop the row.
ACTION_DROPROW = 3
[docs] def __init__(self, action=ACTION_ERROR):
'''
:param action: take the behavior indicated by ACTION_ERROR,
ACTION_ZERO, or ACTION_DROPROW.
'''
self.action = action
[docs] def apply(self, value):
try:
return float(value)
except ValueError as err:
if self.action == self.ACTION_ERROR:
raise err
elif self.action == self.ACTION_ZERO:
return 0.0
elif self.action == self.ACTION_DROPROW:
raise rigidity.errors.DropRow()
else:
raise err
[docs]class NoneToEmptyString(Rule):
'''
Replace None values with an empty string. This is useful in cases
where legacy software uses None to create an empty cell, but your
other checks require a string.
'''
[docs] def apply(self, value):
if value is None:
return ''
return value
[docs]class RemoveLinebreaks(Rule):
'''
Remove linebreaks from the start and end of field values. These can
sometimes be introduced into files and create problems for humans
because they are invisible.to human users.
'''
[docs] def apply(self, value):
return value.strip('\r\n')
[docs]class ReplaceValue(Rule):
'''
Check if the value has a specified replacement. If it does, replace
it with that value. If it does not, take one of the following
configurable actions: pass it through unmodified, drop the row,
or use a default value.
'''
#: When no replacement is found, drop the row.
ACTION_DROPROW = 1
#: When no replacement is found, return a set default value.
ACTION_DEFAULT_VALUE = 2
#: When no replacement is found, allow the original to pass through.
ACTION_PASSTHROUGH = 3
#: When no replacement is found, raise an exception.
ACTION_ERROR = 4
#: When no replacement is found, return an empty string.
ACTION_BLANK = 5
#: .. warning:: ACTION_DROP is deprecated due to the name being similar
#: to ACTION_DROPROW. Use ACTION_BLANK instead.
ACTION_DROP = ACTION_BLANK # Legacy support for v1.2.0; depreciated
[docs] def __init__(self, replacements={}, missing_action=ACTION_ERROR,
default_value=''):
'''
:param dict replacements: a mapping between original values
and replacement values.
:param missing_action: when a replacement is not found for a
value, take the behavior specified by the specified value,
such as ACTION_DROP, ACTION_DEFAULT_VALUE,
ACTION_PASSTHROUGH, or ACTION_ERROR.
:param default_value: if ACTION_DEFAULT_VALUE is the missing
replacement behavior, use this variable as the default
replacement value.
'''
self.replacements = replacements
self.missing_action = missing_action
self.default_value = default_value
if missing_action == self.ACTION_BLANK:
self.missing_action = self.ACTION_DEFAULT_VALUE
self.default_value = ''
[docs] def apply(self, value):
if value in self.replacements:
return self.replacements[value]
elif self.missing_action == self.ACTION_DROPROW:
raise rigidity.errors.DropRow()
elif self.missing_action == self.ACTION_PASSTHROUGH:
return value
elif self.missing_action == self.ACTION_DEFAULT_VALUE:
return self.default_value
elif self.missing_action == self.ACTION_ERROR:
raise IndexError('No replacement for value')
else:
raise IndexError('No replacement for value; invalid default action')
[docs]class Static(Rule):
'''
Replace a field's value with a static value declared during
initialization.
'''
[docs] def __init__(self, value):
self.static_value = value
[docs] def apply(self, value):
return self.static_value
[docs]class Unique(Rule):
'''
Only allow unique values to pass. When a repeated value is found,
the row may be dropped or an error may be raised.
'''
#: When repeat data is encountered, raise an exception.
ACTION_ERROR = 1
#: When repeat data is encountered, drop the row.
ACTION_DROPROW = 2
[docs] def __init__(self, action=ACTION_ERROR):
'''
:param action: Accepts either ACTION_ERROR or ACTION_DROPROW as
the behavior to be performed when a value is not unique.
'''
self.action = action
self.encountered = []
[docs] def apply(self, value):
'''
Check that a value is unique.
:raises ValueError: when ACTION_ERROR is set and the value is
not unique.
'''
if value in self.encountered:
if self.action == self.ACTION_ERROR:
raise ValueError('Value not unique')
elif self.action == self.ACTION_DROPROW:
raise rigidity.errors.DropRow()
else:
raise ValueError('Invalid action set')
self.encountered.append(value)
return value
[docs]class Drop(Rule):
'''
Drop the data in this column, replacing all data with an empty
string value.
'''
[docs] def apply(self, value):
return ''
[docs]class Strip(Rule):
'''
Strip excess white space from the beginning and end of a value.
'''
[docs] def __init__(self, chars=None):
if chars:
self.strip_args = [chars]
else:
self.strip_args = []
[docs] def apply(self, value):
return value.strip(*self.strip_args)
[docs]class UpcA(Rule):
'''
Validate UPC-A barscode numbers to ensure that they are 12 digits.
Strict validation of the check digit may also be enabled.
'''
[docs] def __init__(self, strict=False):
'''
:param bool strict: If `true`, raise a ValueError if the given
UPC code fails the check digit validation.
'''
self.strict = strict
[docs] def apply(self, value):
'''
Cast the value to a string, then check that it is numeric.
Afterwards, zero-pad the left side to reach the standard length
of 12 digits.
:raises ValueError: when strict mode is enabled and the given
UPC code fails the check digit validation.
'''
value = str(value)
if not value.isdigit():
raise ValueError('UPC-A code is not numeric.')
# Some barcodes become truncated by spreadsheet software that
# treats the column numericly rather than as a string.
value = '0' * (12 - len(value)) + value
if len(value) > 12:
raise ValueError('UPC-A is longer than 12 digits')
# Verify the UPC check digit
if self.strict:
odd = sum([int(x) for x in value[0:11:2]]) * 3
even = sum([int(x) for x in value[1:11:2]])
check = (-1 * (odd + even) % 10)
if int(value[-1]) != check:
raise ValueError('UPC-A check digit is incorrect')
return value
[docs]class Lower(Rule):
'''
Convert a string value to lower-case.
'''
[docs] def apply(self, value):
return value.lower()
[docs]class Upper(Rule):
'''
Convert a string value to upper-case.
'''
[docs] def apply(self, value):
return value.upper()