Coverage for portality / lib / dataobj.py: 67%
822 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-05 00:09 +0100
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-05 00:09 +0100
1# -*- coding: UTF-8 -*-
3from portality.lib import dates
4from portality.datasets import get_country_code, get_currency_code
5from copy import deepcopy
6import locale, json, warnings
7from urllib.parse import urlparse
8from datetime import date, datetime
10from portality.lib.dates import FMT_DATETIME_MS_STD, FMT_DATE_STD
13#########################################################
14## Data coerce functions
16def to_currency_code(val):
17 if val is None:
18 return None
19 nv = get_currency_code(val)
20 if nv is None:
21 raise ValueError("Unable to convert {x} to a valid currency code".format(x=val))
22 uc = to_unicode()
23 return uc(nv)
25def to_country_code(val):
26 if val is None:
27 return None
28 nv = get_country_code(val, fail_if_not_found=True)
29 if nv is None:
30 raise ValueError("Unable to convert {x} to a valid country code".format(x=val))
31 uc = to_unicode()
32 return uc(nv)
34def to_unicode():
35 def to_utf8_unicode(val):
36 if isinstance(val, str):
37 return val
38 elif isinstance(val, str):
39 try:
40 return val.decode("utf8", "strict")
41 except UnicodeDecodeError:
42 raise ValueError("Could not decode string")
43 else:
44 return str(val)
46 return to_utf8_unicode
49def to_unicode_upper(val):
50 ufn = to_unicode()
51 val = ufn(val)
52 return val.upper()
54def to_unicode_lower(val):
55 ufn = to_unicode()
56 val = ufn(val)
57 return val.lower()
59def to_int():
60 def intify(val):
61 # strip any characters that are outside the ascii range - they won't make up the int anyway
62 # and this will get rid of things like strange currency marks
63 if isinstance(val, str):
64 val = val.encode("ascii", errors="ignore")
66 # try the straight cast
67 try:
68 return int(val)
69 except ValueError:
70 pass
72 # could have commas in it, so try stripping them
73 try:
74 return int(val.replace(",", ""))
75 except ValueError:
76 pass
78 # try the locale-specific approach
79 try:
80 return locale.atoi(val)
81 except ValueError:
82 pass
84 raise ValueError("Could not convert string to int: {x}".format(x=val))
86 return intify
88def to_float():
89 def floatify(val):
90 # strip any characters that are outside the ascii range - they won't make up the float anyway
91 # and this will get rid of things like strange currency marks
92 if isinstance(val, str):
93 val = val.encode("ascii", errors="ignore")
95 # try the straight cast
96 try:
97 return float(val)
98 except ValueError:
99 pass
101 # could have commas in it, so try stripping them
102 try:
103 return float(val.replace(",", ""))
104 except ValueError:
105 pass
107 # try the locale-specific approach
108 try:
109 return locale.atof(val)
110 except ValueError:
111 pass
113 raise ValueError("Could not convert string to float: {x}".format(x=val))
115 return floatify
117def date_str(in_format=None, out_format=None):
118 def datify(val):
119 if val is None or val == "":
120 return None
121 if isinstance(val, date) or isinstance(val, datetime):
122 return dates.format(val, format=out_format)
123 else:
124 return dates.reformat(val, in_format=in_format, out_format=out_format)
126 return datify
128def to_datestamp(in_format=None):
129 def stampify(val):
130 return dates.parse(val, format=in_format)
132 return stampify
134def to_isolang(output_format=None):
135 """
136 :param output_format: format from input source to putput. Must be one of:
137 * alpha3
138 * alt3
139 * alpha2
140 * name
141 * fr
142 Can be a list in order of preference, too
143 fixme: we could make these pycountry's keys, removing the need for so many transformations and intermediate steps
144 :return:
145 """
146 # delayed import, since we may not always want to load the whole dataset for a dataobj
147 from portality.lib import isolang as dataset
149 # sort out the output format list
150 if output_format is None:
151 output_format = ["alpha3"]
152 if not isinstance(output_format, list):
153 output_format = [output_format]
155 def isolang(val):
156 if val is None:
157 return None
158 l = dataset.find(val)
159 if l is None:
160 raise ValueError("Unable to find iso code for language {x}".format(x=val))
161 for f in output_format:
162 v = l.get(f)
163 if v is None or v == "":
164 continue
165 return v
167 return isolang
169def to_url(val):
170 if not isinstance(val, str):
171 raise ValueError("Argument passed to to_url was not a string, but type '{t}': '{val}'".format(t=type(val),val=val))
173 val = val.strip()
175 if val == '':
176 return val
178 # parse with urlparse
179 url = urlparse(val)
181 # now check the url has the minimum properties that we require
182 if url.scheme and url.scheme.startswith("http"):
183 uc = to_unicode()
184 return uc(val)
185 else:
186 raise ValueError("Could not convert string {val} to viable URL".format(val=val))
188def to_bool(val):
189 """Conservative boolean cast - don't cast lists and objects to True, just existing booleans and strings."""
190 if val is None:
191 return None
192 if val is True or val is False:
193 return val
195 if isinstance(val, str):
196 if val.lower() == 'true':
197 return True
198 elif val.lower() == 'false':
199 return False
200 raise ValueError("Could not convert string {val} to boolean. Expecting string to either say 'true' or 'false' (not case-sensitive).".format(val=val))
202 raise ValueError("Could not convert {val} to boolean. Expect either boolean or string.".format(val=val))
204def string_canonicalise(canon, allow_fail=False):
205 normalised = {}
206 for a in canon:
207 normalised[a.strip().lower()] = a
209 def sn(val):
210 if val is None:
211 if allow_fail:
212 return None
213 raise ValueError("NoneType not permitted")
215 try:
216 norm = val.strip().lower()
217 except:
218 raise ValueError("Unable to treat value as a string")
220 uc = to_unicode()
221 if norm in normalised:
222 return uc(normalised[norm])
223 if allow_fail:
224 return uc(val)
226 raise ValueError("Unable to canonicalise string")
228 return sn
230############################################################
232############################################################
233# The core data object which manages all the interactions
234# with the underlying data member variable
237class DataObjException(Exception):
238 def __init__(self, *args, **kwargs):
239 try:
240 self.message = args[0]
241 except IndexError:
242 self.message = ''
243 super(DataObjException, self).__init__(*args, **kwargs)
246class DataSchemaException(DataObjException):
247 pass
250class DataObj(object):
251 """
252 Class which provides services to other classes which store their internal data
253 as a python data structure in the self.data field.
254 """
255 warnings.warn("DEPRECATED - use `SeamlessMixin` instead.", DeprecationWarning)
257 SCHEMA = None
259 DEFAULT_COERCE = {
260 # NOTE - if you add something to the default coerce, add it to the default swagger
261 # translation dict below as well. Furthermore if you're adding
262 # custom stuff to the coerce, you will likely need to add an entry
263 # to the swagger translation table as well, in the same way you
264 # extend the coerce map.
265 "unicode": to_unicode(),
266 "unicode_upper" : to_unicode_upper,
267 "unicode_lower": to_unicode_lower,
268 "utcdatetime": date_str(),
269 "utcdatetimemicros" : date_str(out_format=FMT_DATETIME_MS_STD),
270 "bigenddate" : date_str(out_format=FMT_DATE_STD),
271 "integer": to_int(),
272 "float": to_float(),
273 "isolang": to_isolang(),
274 "url": to_url,
275 "bool": to_bool,
276 "isolang_2letter": to_isolang(output_format="alpha2"),
277 "country_code": to_country_code,
278 "currency_code": to_currency_code,
279 "license": string_canonicalise(["CC BY", "CC BY-NC", "CC BY-NC-ND", "CC BY-NC-SA", "CC BY-ND", "CC BY-SA", "Not CC-like"], allow_fail=True),
280 "persistent_identifier_scheme": string_canonicalise(["None", "DOI", "Handles", "ARK"], allow_fail=True),
281 "format": string_canonicalise(["PDF", "HTML", "ePUB", "XML"], allow_fail=True),
282 "deposit_policy": string_canonicalise(["None", "Open Policy Finder", "Dulcinea", "OAKlist", "Diadorim", "Mir@bel"], allow_fail=True),
283 }
285 def __init__(self, raw=None, struct=None, construct_raw=True, expose_data=False, properties=None, coerce_map=None, construct_silent_prune=False, construct_maintain_reference=False, *args, **kwargs):
286 # make a shortcut to the object.__getattribute__ function
287 og = object.__getattribute__
289 # if no subclass has set the coerce, then set it from default
290 try:
291 og(self, "_coerce_map")
292 except:
293 self._coerce_map = coerce_map if coerce_map is not None else deepcopy(self.DEFAULT_COERCE)
295 # if no subclass has set the struct, initialise it
296 try:
297 og(self, "_struct")
298 except:
299 self._struct = struct
301 # assign the data if not already assigned by subclass
302 # NOTE: data is not _data deliberately
303 try:
304 og(self, "data")
305 except:
306 self.data = {} if raw is None else raw
308 # properties to allow automatic object API construction
309 # of the form
310 #
311 # {"<public property name>" : ("<path.to.property>", "<data object wrapper>")
312 # e.g
313 # {"identifier" : ("bibjson.identifier", DataObj))}
314 try:
315 og(self, "_properties")
316 except:
317 self._properties = properties if properties is not None else {}
319 # if no subclass has set expose_data, set it
320 try:
321 og(self, "_expose_data")
322 except:
323 self._expose_data = expose_data
325 # if no subclass has set _construct_silent_prune, set it
326 try:
327 og(self, "_construct_silent_prune")
328 except:
329 self._construct_silent_prune = construct_silent_prune
331 # if no subclass has set _construct_maintain_reference, set it
332 try:
333 og(self, "_construct_maintain_reference")
334 except:
335 self._construct_maintain_reference = construct_maintain_reference
337 # restructure the object based on the struct if requried
338 if self._struct is not None and raw is not None and construct_raw:
339 self.data = construct(self.data, self._struct, self._coerce_map, silent_prune=construct_silent_prune, maintain_reference=construct_maintain_reference)
341 # run against the old validation routine
342 # (now deprecated)
343 self.validate()
345 # run the object's native validation routine
346 self.custom_validate()
348 # keep a reference to the current data record, in case something up the inheritance chain messes with it
349 # (I'm looking at you, UserDict).
350 remember_this = self.data
352 # finally, kick the request up
353 super(DataObj, self).__init__(*args, **kwargs)
354 self.data = remember_this
356 def __getattr__(self, name):
358 # workaround to prevent debugger from disconnecting at the deepcopy method
359 # https://stackoverflow.com/questions/32831050/pycharms-debugger-gives-up-when-hitting-copy-deepcopy
360 # if name.startwith("__"):
361 # raise AttributeError
363 if hasattr(self.__class__, name):
364 return object.__getattribute__(self, name)
366 props, data_attrs = self._list_dynamic_properties()
368 # if the name is not in the dynamic properties, raise an attribute error
369 if name not in props and name not in data_attrs:
370 raise AttributeError('{name} is not set'.format(name=name))
372 # otherwise, extract the path from the properties list or the internal data
373 if name in props:
374 path, wrapper = self._properties.get(name)
375 else:
376 path = name
377 wrapper = DataObj
379 # request the internal property directly (which will in-turn raise the AttributeError if necessary)
380 try:
381 return self._get_internal_property(path, wrapper)
382 except AttributeError:
383 # re-wrap the attribute error with the name, rather than the path
384 raise AttributeError('{name} is not set'.format(name=name))
386 def __setattr__(self, key, value):
387 # first set the attribute on any explicitly defined property
388 try:
389 if hasattr(self.__class__, key):
390 # att = object.__getattribute__(self, key)
391 return object.__setattr__(self, key, value)
392 except AttributeError:
393 pass
395 # this could be an internal attribute from the constructor, so we need to make
396 # a special case
397 if key in ["_coerce_map", "_struct", "data", "_properties", "_expose_data"]:
398 return object.__setattr__(self, key, value)
400 props, data_attrs = self._list_dynamic_properties()
402 # extract the path from the properties list or the internal data
403 path = None
404 wrapper = None
405 if key in props:
406 path, wrapper = self._properties.get(key)
407 elif key in data_attrs:
408 path = key
409 wrapper = DataObj
411 # try to set the property on othe internal object
412 if path is not None:
413 wasset = self._set_internal_property(path, value, wrapper)
414 if wasset:
415 return
417 # fall back to the default approach of allowing any attribute to be set on the object
418 return object.__setattr__(self, key, value)
420 def check_construct(self):
421 """
422 Apply the construct to the internal data and throw errors if it is not validated
424 This could be used, for example, if external processes have violated the .data encapsulation, or
425 if internal processes which change .data need to be checked to make sure they haven't strayed outside
426 their remit
428 :return:
429 """
430 if self._struct is not None and self.data is not None:
431 construct(self.data, self._struct, self._coerce_map, silent_prune=False, maintain_reference=False)
433 def validate(self):
434 """
435 DEPRECATED - use 'check_construct' instead.
437 :return:
438 """
439 warnings.warn("DEPRECATED - use 'check_construct' instead.", DeprecationWarning)
440 if self.SCHEMA is not None:
441 validate(self.data, self.SCHEMA)
442 return True
444 def custom_validate(self):
445 pass
447 def populate(self, fields_and_values):
448 for k, v in fields_and_values.items():
449 setattr(self, k, v)
451 def clone(self):
452 return self.__class__(deepcopy(self.data))
454 def json(self):
455 return json.dumps(self.data)
457 def get_struct(self):
458 return self._struct
460 def _get_internal_property(self, path, wrapper=None):
461 # pull the object from the structure, to find out what kind of retrieve it needs
462 # (if there is a struct)
463 type, substruct, instructions = None, None, None
464 if self._struct:
465 type, substruct, instructions = construct_lookup(path, self._struct)
467 if type is None:
468 # if there is no struct, or no object mapping was found, try to pull the path
469 # as a single node (may be a field, list or dict, we'll find out in a mo)
470 val = self._get_single(path)
472 # if this is a dict or a list and a wrapper is supplied, wrap it
473 if wrapper is not None:
474 if isinstance(val, dict):
475 return wrapper(val, expose_data=self._expose_data)
476 elif isinstance(val, list) and len(val) > 0:
477 if isinstance(val[0], dict): # just check the first one
478 return [wrapper(v, expose_data=self._expose_data) for v in val]
480 # otherwise, return the raw value if it is not None, or raise an AttributeError
481 if val is None:
482 raise AttributeError('{name} is not set'.format(name=path))
484 return val
486 if instructions is None:
487 instructions = {}
489 # if the struct contains a reference to the path, always return something, even if it is None - don't raise an AttributeError
490 kwargs = construct_kwargs(type, "get", instructions)
491 coerce_fn = self._coerce_map.get(instructions.get("coerce"))
492 if coerce_fn is not None:
493 kwargs["coerce"] = coerce_fn
495 if type == "field":
496 return self._get_single(path, **kwargs)
497 elif type == "object":
498 d = self._get_single(path, **kwargs)
499 if wrapper:
500 return wrapper(d, substruct, construct_raw=False, expose_data=self._expose_data) # FIXME: this means all substructures are forced to use this classes expose_data policy, whatever it is
501 else:
502 return d
503 elif type == "list":
504 if instructions.get("contains") == "field":
505 return self._get_list(path, **kwargs)
506 elif instructions.get("contains") == "object":
507 l = self._get_list(path, **kwargs)
508 if wrapper:
509 return [wrapper(o, substruct, construct_raw=False, expose_data=self._expose_data) for o in l] # FIXME: this means all substructures are forced to use this classes expose_data policy, whatever it is
510 else:
511 return l
513 # if for whatever reason we get here, raise the AttributeError
514 raise AttributeError('{name} is not set'.format(name=path))
516 def _set_internal_property(self, path, value, wrapper=None):
518 def _wrap_validate(val, wrap, substruct):
519 if wrap is None:
520 if isinstance(val, DataObj):
521 return val.data
522 else:
523 return val
525 else:
526 if isinstance(val, DataObj):
527 if isinstance(val, wrap):
528 return val.data
529 else:
530 raise AttributeError("Attempt to set {x} failed; is not of an allowed type.".format(x=path))
531 else:
532 try:
533 d = wrap(val, substruct)
534 return d.data
535 except DataStructureException as e:
536 raise AttributeError(str(e))
538 # pull the object from the structure, to find out what kind of retrieve it needs
539 # (if there is a struct)
540 type, substruct, instructions = None, None, None
541 if self._struct:
542 type, substruct, instructions = construct_lookup(path, self._struct)
544 # if no type is found, then this means that either the struct was undefined, or the
545 # path did not point to a valid point in the struct. In the case that the struct was
546 # defined, this means the property is trying to set something outside the struct, which
547 # isn't allowed. So, only set types which are None against objects which don't define
548 # the struct.
549 if type is None:
550 if self._struct is None:
551 if isinstance(value, list):
552 value = [_wrap_validate(v, wrapper, None) for v in value]
553 self._set_list(path, value)
554 else:
555 value = _wrap_validate(value, wrapper, None)
556 self._set_single(path, value)
558 return True
559 else:
560 return False
562 if instructions is None:
563 instructions = {}
565 kwargs = construct_kwargs(type, "set", instructions)
566 coerce_fn = self._coerce_map.get(instructions.get("coerce"))
567 if coerce_fn is not None:
568 kwargs["coerce"] = coerce_fn
570 if type == "field":
571 self._set_single(path, value, **kwargs)
572 return True
573 elif type == "object":
574 v = _wrap_validate(value, wrapper, substruct)
575 self._set_single(path, v, **kwargs)
576 return True
577 elif type == "list":
578 if instructions.get("contains") == "field":
579 self._set_list(path, value, **kwargs)
580 return True
581 elif instructions.get("contains") == "object":
582 if not isinstance(value, list):
583 value = [value]
584 vals = [_wrap_validate(v, wrapper, substruct) for v in value]
585 self._set_list(path, vals, **kwargs)
586 return True
588 return False
590 def _list_dynamic_properties(self):
591 # list the dynamic properties the object could have
592 props = []
593 try:
594 # props = og(self, 'properties').keys()
595 props = list(self._properties.keys())
596 except AttributeError:
597 pass
599 data_attrs = []
600 try:
601 if self._expose_data:
602 if self._struct:
603 data_attrs = construct_data_keys(self._struct)
604 else:
605 data_attrs = list(self.data.keys())
606 except AttributeError:
607 pass
609 return props, data_attrs
611 def _add_struct(self, struct):
612 # if the struct is not yet set, set it
613 try:
614 object.__getattribute__(self, "_struct")
615 self._struct = construct_merge(self._struct, struct)
616 except:
617 self._struct = struct
619 def _get_path(self, path, default):
620 parts = path.split(".")
621 context = self.data
623 for i in range(len(parts)):
624 p = parts[i]
625 d = {} if i < len(parts) - 1 else default
626 context = context.get(p, d)
627 return context
629 def _set_path(self, path, val):
630 parts = path.split(".")
631 context = self.data
633 for i in range(len(parts)):
634 p = parts[i]
636 if p not in context and i < len(parts) - 1:
637 context[p] = {}
638 context = context[p]
639 elif p in context and i < len(parts) - 1:
640 context = context[p]
641 else:
642 context[p] = val
644 def _delete_from_list(self, path, val=None, matchsub=None, prune=True, apply_struct_on_matchsub=True):
645 """
646 Note that matchsub will be coerced with the struct if it exists, to ensure
647 that the match is done correctly
649 :param path:
650 :param val:
651 :param matchsub:
652 :param prune:
653 :return:
654 """
655 l = self._get_list(path)
657 removes = []
658 i = 0
659 for entry in l:
660 if val is not None:
661 if entry == val:
662 removes.append(i)
663 elif matchsub is not None:
664 # attempt to coerce the sub
665 if apply_struct_on_matchsub:
666 try:
667 object.__getattribute__(self, "_struct")
668 type, struct, instructions = construct_lookup(path, self._struct)
669 if struct is not None:
670 matchsub = construct(matchsub, struct, self._coerce_map)
671 except:
672 pass
674 matches = 0
675 for k, v in matchsub.items():
676 if entry.get(k) == v:
677 matches += 1
678 if matches == len(list(matchsub.keys())):
679 removes.append(i)
680 i += 1
682 removes.sort(reverse=True)
683 for r in removes:
684 del l[r]
686 if len(l) == 0 and prune:
687 self._delete(path, prune)
689 def _delete(self, path, prune=True):
690 parts = path.split(".")
691 context = self.data
693 stack = []
694 for i in range(len(parts)):
695 p = parts[i]
696 if p in context:
697 if i < len(parts) - 1:
698 stack.append(context[p])
699 context = context[p]
700 else:
701 del context[p]
702 if prune and len(stack) > 0:
703 stack.pop() # the last element was just deleted
704 self._prune_stack(stack)
706 def _prune_stack(self, stack):
707 while len(stack) > 0:
708 context = stack.pop()
709 todelete = []
710 for k, v in context.items():
711 if isinstance(v, dict) and len(list(v.keys())) == 0:
712 todelete.append(k)
713 for d in todelete:
714 del context[d]
716 def _coerce(self, val, cast, accept_failure=False):
717 if cast is None:
718 return val
719 try:
720 return cast(val)
721 except (ValueError, TypeError):
722 if accept_failure:
723 return val
724 raise DataSchemaException("Cast with {x} failed on '{y}' of type {z}".format(x=cast, y=val, z=type(val)))
726 def _get_single(self, path, coerce=None, default=None, allow_coerce_failure=True):
727 # get the value at the point in the object
728 val = self._get_path(path, default)
730 if coerce is not None and val is not None:
731 # if you want to coerce and there is something to coerce do it
732 return self._coerce(val, coerce, accept_failure=allow_coerce_failure)
733 else:
734 # otherwise return the value
735 return val
737 def _get_list(self, path, coerce=None, by_reference=True, allow_coerce_failure=True):
738 # get the value at the point in the object
739 val = self._get_path(path, None)
741 # if there is no value and we want to do by reference, then create it, bind it and return it
742 if val is None and by_reference:
743 mylist = []
744 self._set_single(path, mylist)
745 return mylist
747 # otherwise, default is an empty list
748 elif val is None and not by_reference:
749 return []
751 # check that the val is actually a list
752 if not isinstance(val, list):
753 raise DataSchemaException("Expecting a list at {x} but found {y}".format(x=path, y=val))
755 # if there is a value, do we want to coerce each of them
756 if coerce is not None:
757 coerced = [self._coerce(v, coerce, accept_failure=allow_coerce_failure) for v in val]
758 if by_reference:
759 self._set_single(path, coerced)
760 return coerced
761 else:
762 if by_reference:
763 return val
764 else:
765 return deepcopy(val)
767 def _set_single(self, path, val, coerce=None, allow_coerce_failure=False, allowed_values=None, allowed_range=None,
768 allow_none=True, ignore_none=False):
770 if val is None and ignore_none:
771 return
773 if val is None and not allow_none:
774 raise DataSchemaException("NoneType is not allowed at {x}".format(x=path))
776 # first see if we need to coerce the value (and don't coerce None)
777 if coerce is not None and val is not None:
778 val = self._coerce(val, coerce, accept_failure=allow_coerce_failure)
780 if allowed_values is not None and val not in allowed_values:
781 raise DataSchemaException("Value {x} is not permitted at {y}".format(x=val, y=path))
783 if allowed_range is not None:
784 lower, upper = allowed_range
785 if (lower is not None and val < lower) or (upper is not None and val > upper):
786 raise DataSchemaException("Value {x} is outside the allowed range: {l} - {u}".format(x=val, l=lower, u=upper))
788 # now set it at the path point in the object
789 self._set_path(path, val)
791 def _set_list(self, path, val, coerce=None, allow_coerce_failure=False, allow_none=True, ignore_none=False):
792 # first ensure that the value is a list
793 if not isinstance(val, list):
794 val = [val]
796 # now carry out the None check
797 # for each supplied value, if it is none, and none is not allowed, raise an error if we do not
798 # plan to ignore the nones.
799 for v in val:
800 if v is None and not allow_none:
801 if not ignore_none:
802 raise DataSchemaException("NoneType is not allowed at {x}".format(x=path))
804 # now coerce each of the values, stripping out Nones if necessary
805 val = [self._coerce(v, coerce, accept_failure=allow_coerce_failure) for v in val if v is not None or not ignore_none]
807 # check that the cleaned array isn't empty, and if it is behave appropriately
808 if len(val) == 0:
809 # this is equivalent to a None, so we need to decide what to do
810 if ignore_none:
811 # if we are ignoring nones, just do nothing
812 return
813 elif not allow_none:
814 # if we are not ignoring nones, and not allowing them, raise an error
815 raise DataSchemaException("Empty array not permitted at {x}".format(x=path))
817 # now set it on the path
818 self._set_path(path, val)
820 def _add_to_list(self, path, val, coerce=None, allow_coerce_failure=False, allow_none=False, allowed_values=None, ignore_none=True, unique=False):
821 if val is None and ignore_none:
822 return
824 if val is None and not allow_none:
825 raise DataSchemaException("NoneType is not allowed in list at {x}".format(x=path))
827 if allowed_values is not None and val not in allowed_values:
828 raise DataSchemaException("Value {x} is not permitted at {y}".format(x=val, y=path))
830 # first coerce the value
831 if coerce is not None:
832 val = self._coerce(val, coerce, accept_failure=allow_coerce_failure)
833 current = self._get_list(path, by_reference=True)
835 # if we require the list to be unique, check for the value first
836 if unique:
837 if val in current:
838 return
840 # otherwise, append
841 current.append(val)
843 def _set_with_struct(self, path, val):
844 type, struct, instructions = construct_lookup(path, self._struct)
845 if type == "field":
846 kwargs = construct_kwargs(type, "set", instructions)
847 coerce_fn = self._coerce_map.get(instructions.get("coerce", "unicode"))
848 self._set_single(path, val, coerce=coerce_fn, **kwargs)
849 elif type == "list":
850 if not isinstance(val, list):
851 val = [val]
852 if struct is not None:
853 val = [construct(x, struct, self._coerce_map) for x in val]
854 kwargs = construct_kwargs(type, "set", instructions)
855 coerce_fn = self._coerce_map.get(instructions.get("coerce"))
856 self._set_list(path, val, coerce=coerce_fn, **kwargs)
857 elif type == "object":
858 if struct is not None:
859 val = construct(val, struct, self._coerce_map)
860 self._set_single(path, val)
862 def _add_to_list_with_struct(self, path, val):
863 type, struct, instructions = construct_lookup(path, self._struct)
864 if type != "list":
865 raise DataStructureException("Attempt to add to list {x} failed - it is not a list element".format(x=path))
866 if struct is not None:
867 val = construct(val, struct, self._coerce_map)
868 kwargs = construct_kwargs(type, "set", instructions)
869 self._add_to_list(path, val, **kwargs)
872 def _utf8_unicode(self):
873 """
874 DEPRECATED - use dataobj.to_unicode() instead
875 """
876 return to_unicode()
878 def _int(self):
879 """
880 DEPRECATED - use dataobj.to_int() instead
881 """
882 return to_int()
884 def _float(self):
885 """
886 DEPRECATED - use dataobj.to_float() instead
887 """
888 return to_float()
890 def _date_str(self, in_format=None, out_format=None):
891 """
892 DEPRECATED - use dataobj.date_str instead
893 """
894 return date_str(in_format=in_format, out_format=out_format)
897############################################################
898## Primitive object schema validation
900class ObjectSchemaValidationError(DataObjException):
901 pass
904def validate(obj, schema):
905 """
906 DEPRECATED - use 'construct' instead.
908 :param obj:
909 :param schema:
910 :return:
911 """
912 warnings.warn("DEPRECATED - use 'construct' instead.", DeprecationWarning)
914 # all fields
915 allowed = schema.get("bools", []) + schema.get("fields", []) + schema.get("lists", []) + schema.get("objects", [])
917 for k, v in obj.items():
918 # is k allowed at all
919 if k not in allowed:
920 raise ObjectSchemaValidationError("object contains key " + k + " which is not permitted by schema")
922 # check the bools are bools
923 if k in schema.get("bools", []):
924 if type(v) != bool:
925 raise ObjectSchemaValidationError("object contains " + k + " = " + str(v) + " but expected boolean")
927 # check that the fields are plain old strings
928 if k in schema.get("fields", []):
929 if type(v) != str and type(v) != int and type(v) != float:
930 raise ObjectSchemaValidationError("object contains " + k + " = " + str(v) + " but expected string, unicode or a number")
932 # check that the lists are really lists
933 if k in schema.get("lists", []):
934 if type(v) != list:
935 raise ObjectSchemaValidationError("object contains " + k + " = " + str(v) + " but expected list")
936 # if it is a list, then for each member validate
937 entry_schema = schema.get("list_entries", {}).get(k)
938 if entry_schema is None:
939 # validate the entries as fields
940 for e in v:
941 if type(e) != str and type(e) != int and type(e) != float:
942 raise ObjectSchemaValidationError("list in object contains " + str(type(e)) + " but expected string, unicode or a number in " + k)
943 else:
944 # validate each entry against the schema
945 for e in v:
946 validate(e, entry_schema)
948 # check that the objects are objects
949 if k in schema.get("objects", []):
950 if type(v) != dict:
951 raise ObjectSchemaValidationError("object contains " + k + " = " + str(v) + " but expected object/dict")
952 # if it is an object, then validate
953 object_schema = schema.get("object_entries", {}).get(k)
954 if object_schema is None:
955 #raise ObjectSchemaValidationError("no object entry for object " + k)
956 pass # we are not imposing a schema on this object
957 else:
958 validate(v, object_schema)
961############################################################
962## Data structure coercion
964class DataStructureException(DataObjException):
965 pass
967class ConstructException(DataObjException):
968 pass
970class ScriptTagFoundException(DataObjException):
971 """
972 Exception to raise when script tag has been found in metadata
973 """
974 pass
977def construct_validate(struct, context=""):
978 """
979 Is the provided struct of the correct form
980 {
981 "fields" : {
982 "field_name" : {"coerce" :"coerce_function", **kwargs}
983 },
984 "objects" : [
985 "field_name"
986 ],
987 "lists" : {
988 "field_name" : {"contains" : "object|field", "coerce" : "field_coerce_function, **kwargs}
989 },
990 "required" : ["field_name"],
991 "structs" : {
992 "field_name" : {
993 <construct>
994 }
995 }
996 }
997 """
998 # check that only the allowed keys are present
999 keys = struct.keys()
1000 for k in keys:
1001 if k not in ["fields", "objects", "lists", "required", "structs"]:
1002 c = context if context != "" else "root"
1003 raise ConstructException("Key '{x}' present in struct at '{y}', but is not permitted".format(x=k, y=c))
1005 # now go through and make sure the fields are the right shape:
1006 for field_name, instructions in struct.get("fields", {}).items():
1007 if "coerce" not in instructions:
1008 c = context if context != "" else "root"
1009 raise ConstructException("Coerce function not listed in field '{x}' at '{y}'".format(x=field_name, y=c))
1010 for k,v in instructions.items():
1011 if not isinstance(v, list) and not isinstance(v, str):
1012 c = context if context != "" else "root"
1013 raise ConstructException("Argument '{a}' in field '{b}' at '{c}' is not a string or list".format(a=k, b=field_name, c=c))
1015 # then make sure the objects are ok
1016 for o in struct.get("objects", []):
1017 if not isinstance(o, str):
1018 c = context if context != "" else "root"
1019 raise ConstructException("There is a non-string value in the object list at '{y}'".format(y=c))
1021 # make sure the lists are correct
1022 for field_name, instructions in struct.get("lists", {}).items():
1023 contains = instructions.get("contains")
1024 if contains is None:
1025 c = context if context != "" else "root"
1026 raise ConstructException("No 'contains' argument in list definition for field '{x}' at '{y}'".format(x=field_name, y=c))
1027 if contains not in ["object", "field"]:
1028 c = context if context != "" else "root"
1029 raise ConstructException("'contains' argument in list '{x}' at '{y}' contains illegal value '{z}'".format(x=field_name, y=c, z=contains))
1030 for k,v in instructions.items():
1031 if not isinstance(v, list) and not isinstance(v, str):
1032 c = context if context != "" else "root"
1033 raise ConstructException("Argument '{a}' in list '{b}' at '{c}' is not a string or list".format(a=k, b=field_name, c=c))
1035 # make sure the requireds are correct
1036 for o in struct.get("required", []):
1037 if not isinstance(o, str):
1038 c = context if context != "" else "root"
1039 raise ConstructException("There is a non-string value in the required list at '{y}'".format(y=c))
1041 # now do the structs, which will involve some recursion
1042 substructs = struct.get("structs", {})
1044 # first check that there are no previously unknown keys in there
1045 possibles = struct.get("objects", []) + list(struct.get("lists", {}).keys())
1046 for s in substructs:
1047 if s not in possibles:
1048 c = context if context != "" else "root"
1049 raise ConstructException("struct contains key '{a}' which is not listed in object or list definitions at '{x}'".format(a=s, x=c))
1051 # now recurse into each struct
1052 for k,v in substructs.items():
1053 nc = context
1054 if nc == "":
1055 nc = k
1056 else:
1057 nc += "." + k
1058 construct_validate(v, context=nc)
1060 return True
1063def construct(obj, struct, coerce, context="", silent_prune=False, maintain_reference=False):
1064 """
1065 {
1066 "fields" : {
1067 "field_name" : {"coerce" :"coerce_function", **kwargs}
1069 },
1070 "objects" : [
1071 "field_name"
1072 ],
1073 "lists" : {
1074 "field_name" : {"contains" : "object|field", "coerce" : "field_coerce_function, **kwargs}
1075 },
1076 "required" : ["field_name"],
1077 "structs" : {
1078 "field_name" : {
1079 <construct>
1080 }
1081 }
1082 }
1084 :param obj:
1085 :param struct:
1086 :param coerce:
1087 :return:
1088 """
1089 if obj is None:
1090 return None
1092 # check that all the required fields are there
1093 try:
1094 keys = list(obj.keys())
1095 except:
1096 c = context if context != "" else "root"
1097 raise DataStructureException("Expected an object at {c} but found something else instead".format(c=c))
1099 for r in struct.get("required", []):
1100 if r not in keys:
1101 c = context if context != "" else "root"
1102 raise DataStructureException("Field '{r}' is required but not present at '{c}'".format(r=r, c=c))
1104 # check that there are no fields that are not allowed
1105 # Note that since the construct mechanism copies fields explicitly, silent_prune literally just turns off this
1106 # check
1107 if not silent_prune:
1108 allowed = list(struct.get("fields", {}).keys()) + struct.get("objects", []) + list(struct.get("lists", {}).keys())
1109 for k in keys:
1110 if k not in allowed:
1111 c = context if context != "" else "root"
1112 raise DataStructureException("Field '{k}' is not permitted at '{c}'".format(k=k, c=c))
1115 # this is the new object we'll be creating from the old
1116 constructed = DataObj()
1118 # now check all the fields
1119 for field_name, instructions in struct.get("fields", {}).items():
1120 val = obj.get(field_name)
1121 if val is None:
1122 continue
1123 coerce_fn = coerce.get(instructions.get("coerce", "unicode"))
1124 if coerce_fn is None:
1125 raise DataStructureException("No coersion function defined for type '{x}' at '{c}'".format(x=instructions.get("coerce", "unicode"), c=context + field_name))
1127 kwargs = construct_kwargs("field", "set", instructions)
1129 try:
1130 constructed._set_single(field_name, val, coerce=coerce_fn, **kwargs)
1131 except DataSchemaException as e:
1132 raise DataStructureException("Schema exception at '{a}', {b}".format(a=context + field_name, b=str(e)))
1134 # next check all the objetcs (which will involve a recursive call to this function)
1135 for field_name in struct.get("objects", []):
1136 val = obj.get(field_name)
1137 if val is None:
1138 continue
1139 if type(val) != dict:
1140 raise DataStructureException("Found '{x}' = '{y}' but expected object/dict".format(x=context + field_name, y=val))
1142 instructions = struct.get("structs", {}).get(field_name)
1144 if instructions is None:
1145 # this is the lowest point at which we have instructions, so just accept the data structure as-is
1146 # (taking a deep copy to destroy any references)
1147 try:
1148 constructed._set_single(field_name, deepcopy(val))
1149 except DataSchemaException as e:
1150 raise DataStructureException(str(e))
1151 else:
1152 # we need to recurse further down
1153 beneath = construct(val, instructions, coerce=coerce, context=context + field_name + ".", silent_prune=silent_prune)
1155 # what we get back is the correct sub-data structure, which we can then store
1156 try:
1157 constructed._set_single(field_name, beneath)
1158 except DataSchemaException as e:
1159 raise DataStructureException(str(e))
1161 # now check all the lists
1162 for field_name, instructions in struct.get("lists", {}).items():
1163 vals = obj.get(field_name)
1164 if vals is None:
1165 continue
1166 if not isinstance(vals, list):
1167 raise DataStructureException("Expecting list at {x} but found something else".format(x=context + field_name))
1169 # prep the keyword arguments for the setters
1170 kwargs = construct_kwargs("list", "set", instructions)
1172 contains = instructions.get("contains")
1173 if contains == "field":
1174 # coerce all the values in the list
1175 coerce_fn = coerce.get(instructions.get("coerce", "unicode"))
1176 if coerce_fn is None:
1177 raise DataStructureException("No coersion function defined for type '{x}' at '{c}'".format(x=instructions.get("coerce", "unicode"), c=context + field_name))
1179 for i in range(len(vals)):
1180 val = vals[i]
1181 try:
1182 constructed._add_to_list(field_name, val, coerce=coerce_fn, **kwargs)
1183 except DataSchemaException as e:
1184 raise DataStructureException(str(e))
1186 elif contains == "object":
1187 # for each object in the list, send it for construction
1188 for i in range(len(vals)):
1189 val = vals[i]
1191 if type(val) != dict:
1192 raise DataStructureException("Found '{x}[{p}]' = '{y}' but expected object/dict".format(x=context + field_name, y=val, p=i))
1194 subinst = struct.get("structs", {}).get(field_name)
1195 if subinst is None:
1196 try:
1197 constructed._add_to_list(field_name, deepcopy(val))
1198 except DataSchemaException as e:
1199 raise DataStructureException(str(e))
1200 else:
1201 # we need to recurse further down
1202 beneath = construct(val, subinst, coerce=coerce, context=context + field_name + "[" + str(i) + "].", silent_prune=silent_prune)
1204 # what we get back is the correct sub-data structure, which we can then store
1205 try:
1206 constructed._add_to_list(field_name, beneath)
1207 except DataSchemaException as e:
1208 raise DataStructureException(str(e))
1210 else:
1211 raise DataStructureException("Cannot understand structure where list '{x}' elements contain '{y}'".format(x=context + field_name, y=contains))
1213 if maintain_reference:
1214 obj.clear()
1215 obj.update(constructed.data)
1216 return obj
1217 else:
1218 return constructed.data
1221def construct_merge(target, source):
1222 merged = deepcopy(target)
1224 for field, instructions in source.get("fields", {}).items():
1225 if "fields" not in merged:
1226 merged["fields"] = {}
1227 if field not in merged["fields"]:
1228 merged["fields"][field] = deepcopy(instructions)
1230 for obj in source.get("objects", []):
1231 if "objects" not in merged:
1232 merged["objects"] = []
1233 if obj not in merged["objects"]:
1234 merged["objects"].append(obj)
1236 for field, instructions in source.get("lists", {}).items():
1237 if "lists" not in merged:
1238 merged["lists"] = {}
1239 if field not in merged["lists"]:
1240 merged["lists"][field] = deepcopy(instructions)
1242 for r in source.get("required", []):
1243 if "required" not in merged:
1244 merged["required"] = []
1245 if r not in merged["required"]:
1246 merged["required"].append(r)
1248 for field, struct in source.get("structs", {}).items():
1249 if "structs" not in merged:
1250 merged["structs"] = {}
1251 if field not in merged["structs"]:
1252 merged["structs"][field] = deepcopy(struct)
1253 else:
1254 # recursively merge
1255 merged["structs"][field] = construct_merge(merged["structs"][field], struct)
1257 return merged
1259def construct_lookup(path, struct):
1260 bits = path.split(".")
1262 # if there's more than one path element, we will need to recurse
1263 if len(bits) > 1:
1264 # it has to be an object, in order for the path to still have multiple
1265 # segments
1266 if bits[0] not in struct.get("objects", []):
1267 return None, None, None
1268 substruct = struct.get("structs", {}).get(bits[0])
1269 return construct_lookup(".".join(bits[1:]), substruct)
1270 elif len(bits) == 1:
1271 # first check the fields
1272 instructions = struct.get("fields", {}).get(bits[0])
1273 if instructions is not None:
1274 return "field", None, instructions
1276 # then check the lists
1277 instructions = struct.get("lists", {}).get(bits[0])
1278 if instructions is not None:
1279 structure = struct.get("structs", {}).get(bits[0])
1280 return "list", structure, instructions
1282 # then check the objects
1283 if bits[0] in struct.get("objects", []):
1284 structure = struct.get("structs", {}).get(bits[0])
1285 return "object", structure, None
1287 return None, None, None
1289def construct_kwargs(type, dir, instructions):
1290 # if there are no instructions there are no kwargs
1291 if instructions is None:
1292 return {}
1294 # take a copy of the instructions that we can modify
1295 kwargs = deepcopy(instructions)
1297 # remove the known arguments for the field type
1298 if type == "field":
1299 if "coerce" in kwargs:
1300 del kwargs["coerce"]
1302 elif type == "list":
1303 if "coerce" in kwargs:
1304 del kwargs["coerce"]
1305 if "contains" in kwargs:
1306 del kwargs["contains"]
1308 nk = {}
1309 if dir == "set":
1310 for k, v in kwargs.items():
1311 # basically everything is a "set" argument unless explicitly stated to be a "get" argument
1312 if not k.startswith("get__"):
1313 if k.startswith("set__"): # if it starts with the set__ prefix, remove it
1314 k = k[5:]
1315 nk[k] = v
1316 elif dir == "get":
1317 for k, v in kwargs.items():
1318 # must start with "get" argument
1319 if k.startswith("get__"):
1320 nk[k[5:]] = v
1322 return nk
1324def construct_data_keys(struct):
1325 return list(struct.get("fields", {})) + list(struct.get("objects", [])) + list(struct.get("lists", {}))
1327def merge_outside_construct(struct, target, source):
1328 merged = deepcopy(target)
1330 for source_key in source.keys():
1331 # if the source_key is one of the struct's fields, ignore it
1332 if source_key in list(struct.get("fields", {}).keys()):
1333 continue
1335 # if the source_key is one of the struct's lists, ignore it
1336 if source_key in list(struct.get("lists", {}).keys()):
1337 continue
1339 # if the source_key is one of the struct's object, we will need to go deeper
1340 if source_key in struct.get("objects", []):
1341 subsource = source[source_key]
1342 subtarget = target.get(source_key, {})
1343 substruct = struct.get("structs", {}).get(source_key, {})
1344 merged[source_key] = merge_outside_construct(substruct, subtarget, subsource)
1345 continue
1347 # if we get to here, the field in the source is not represented at this level in the struct,
1348 # so we should copy it over in full (unless the target already has a value here)
1349 if source_key not in merged:
1350 merged[source_key] = deepcopy(source[source_key])
1352 return merged
1354############################################################
1355## Unit test support
1357def test_dataobj(obj, fields_and_values):
1358 """
1359 Test a dataobj to make sure that the getters and setters you have specified
1360 are working correctly.
1362 Provide it a data object and a list of fields with the values to set and the expeceted return values (if required):
1364 {
1365 "key" : ("set value", "get value")
1366 }
1368 If you provide only the set value, then the get value will be required to be the same as the set value in the test
1370 {
1371 "key" : "set value"
1372 }
1374 :param obj:
1375 :param fields_and_values:
1376 :return:
1377 """
1378 for k, valtup in fields_and_values.items():
1379 if not isinstance(valtup, tuple):
1380 valtup = (valtup,)
1381 set_val = valtup[0]
1382 try:
1383 setattr(obj, k, set_val)
1384 except AttributeError:
1385 assert False, "Unable to set attribute {x} with value {y}".format(x=k, y=set_val)
1387 for k, valtup in fields_and_values.items():
1388 if not isinstance(valtup, tuple):
1389 valtup = (valtup,)
1390 get_val = valtup[0]
1391 if len(valtup) > 1:
1392 get_val = valtup[1]
1393 val = getattr(obj, k)
1394 assert val == get_val, (k, val, get_val)