Coverage for portality/lib/dataobj.py: 66%
816 statements
« prev ^ index » next coverage.py v6.4.2, created at 2022-07-22 15:59 +0100
« prev ^ index » next coverage.py v6.4.2, created at 2022-07-22 15:59 +0100
1# -*- coding: UTF-8 -*-
3from portality.lib import dates
4from portality.datasets import get_country_code, get_currency_code
5from copy import deepcopy
6import locale, json, warnings
7from urllib.parse import urlparse
8from datetime import date, datetime
10#########################################################
11## Data coerce functions
13def to_currency_code(val):
14 if val is None:
15 return None
16 nv = get_currency_code(val)
17 if nv is None:
18 raise ValueError("Unable to convert {x} to a valid currency code".format(x=val))
19 uc = to_unicode()
20 return uc(nv)
22def to_country_code(val):
23 if val is None:
24 return None
25 nv = get_country_code(val, fail_if_not_found=True)
26 if nv is None:
27 raise ValueError("Unable to convert {x} to a valid country code".format(x=val))
28 uc = to_unicode()
29 return uc(nv)
31def to_unicode():
32 def to_utf8_unicode(val):
33 if isinstance(val, str):
34 return val
35 elif isinstance(val, str):
36 try:
37 return val.decode("utf8", "strict")
38 except UnicodeDecodeError:
39 raise ValueError("Could not decode string")
40 else:
41 return str(val)
43 return to_utf8_unicode
46def to_unicode_upper(val):
47 ufn = to_unicode()
48 val = ufn(val)
49 return val.upper()
51def to_int():
52 def intify(val):
53 # strip any characters that are outside the ascii range - they won't make up the int anyway
54 # and this will get rid of things like strange currency marks
55 if isinstance(val, str):
56 val = val.encode("ascii", errors="ignore")
58 # try the straight cast
59 try:
60 return int(val)
61 except ValueError:
62 pass
64 # could have commas in it, so try stripping them
65 try:
66 return int(val.replace(",", ""))
67 except ValueError:
68 pass
70 # try the locale-specific approach
71 try:
72 return locale.atoi(val)
73 except ValueError:
74 pass
76 raise ValueError("Could not convert string to int: {x}".format(x=val))
78 return intify
80def to_float():
81 def floatify(val):
82 # strip any characters that are outside the ascii range - they won't make up the float anyway
83 # and this will get rid of things like strange currency marks
84 if isinstance(val, str):
85 val = val.encode("ascii", errors="ignore")
87 # try the straight cast
88 try:
89 return float(val)
90 except ValueError:
91 pass
93 # could have commas in it, so try stripping them
94 try:
95 return float(val.replace(",", ""))
96 except ValueError:
97 pass
99 # try the locale-specific approach
100 try:
101 return locale.atof(val)
102 except ValueError:
103 pass
105 raise ValueError("Could not convert string to float: {x}".format(x=val))
107 return floatify
109def date_str(in_format=None, out_format=None):
110 def datify(val):
111 if val is None or val == "":
112 return None
113 if isinstance(val, date) or isinstance(val, datetime):
114 return dates.format(val, format=out_format)
115 else:
116 return dates.reformat(val, in_format=in_format, out_format=out_format)
118 return datify
120def to_datestamp(in_format=None):
121 def stampify(val):
122 return dates.parse(val, format=in_format)
124 return stampify
126def to_isolang(output_format=None):
127 """
128 :param output_format: format from input source to putput. Must be one of:
129 * alpha3
130 * alt3
131 * alpha2
132 * name
133 * fr
134 Can be a list in order of preference, too
135 fixme: we could make these pycountry's keys, removing the need for so many transformations and intermediate steps
136 :return:
137 """
138 # delayed import, since we may not always want to load the whole dataset for a dataobj
139 from portality.lib import isolang as dataset
141 # sort out the output format list
142 if output_format is None:
143 output_format = ["alpha3"]
144 if not isinstance(output_format, list):
145 output_format = [output_format]
147 def isolang(val):
148 if val is None:
149 return None
150 l = dataset.find(val)
151 if l is None:
152 raise ValueError("Unable to find iso code for language {x}".format(x=val))
153 for f in output_format:
154 v = l.get(f)
155 if v is None or v == "":
156 continue
157 return v
159 return isolang
161def to_url(val):
162 if not isinstance(val, str):
163 raise ValueError("Argument passed to to_url was not a string, but type '{t}': '{val}'".format(t=type(val),val=val))
165 val = val.strip()
167 if val == '':
168 return val
170 # parse with urlparse
171 url = urlparse(val)
173 # now check the url has the minimum properties that we require
174 if url.scheme and url.scheme.startswith("http"):
175 uc = to_unicode()
176 return uc(val)
177 else:
178 raise ValueError("Could not convert string {val} to viable URL".format(val=val))
180def to_bool(val):
181 """Conservative boolean cast - don't cast lists and objects to True, just existing booleans and strings."""
182 if val is None:
183 return None
184 if val is True or val is False:
185 return val
187 if isinstance(val, str):
188 if val.lower() == 'true':
189 return True
190 elif val.lower() == 'false':
191 return False
192 raise ValueError("Could not convert string {val} to boolean. Expecting string to either say 'true' or 'false' (not case-sensitive).".format(val=val))
194 raise ValueError("Could not convert {val} to boolean. Expect either boolean or string.".format(val=val))
196def string_canonicalise(canon, allow_fail=False):
197 normalised = {}
198 for a in canon:
199 normalised[a.strip().lower()] = a
201 def sn(val):
202 if val is None:
203 if allow_fail:
204 return None
205 raise ValueError("NoneType not permitted")
207 try:
208 norm = val.strip().lower()
209 except:
210 raise ValueError("Unable to treat value as a string")
212 uc = to_unicode()
213 if norm in normalised:
214 return uc(normalised[norm])
215 if allow_fail:
216 return uc(val)
218 raise ValueError("Unable to canonicalise string")
220 return sn
222############################################################
224############################################################
225# The core data object which manages all the interactions
226# with the underlying data member variable
229class DataObjException(Exception):
230 def __init__(self, *args, **kwargs):
231 try:
232 self.message = args[0]
233 except IndexError:
234 self.message = ''
235 super(DataObjException, self).__init__(*args, **kwargs)
238class DataSchemaException(DataObjException):
239 pass
242class DataObj(object):
243 """
244 Class which provides services to other classes which store their internal data
245 as a python data structure in the self.data field.
246 """
248 SCHEMA = None
250 DEFAULT_COERCE = {
251 # NOTE - if you add something to the default coerce, add it to the default swagger
252 # translation dict below as well. Furthermore if you're adding
253 # custom stuff to the coerce, you will likely need to add an entry
254 # to the swagger translation table as well, in the same way you
255 # extend the coerce map.
256 "unicode": to_unicode(),
257 "unicode_upper" : to_unicode_upper,
258 "utcdatetime": date_str(),
259 "utcdatetimemicros" : date_str(out_format="%Y-%m-%dT%H:%M:%S.%fZ"),
260 "bigenddate" : date_str(out_format="%Y-%m-%d"),
261 "integer": to_int(),
262 "float": to_float(),
263 "isolang": to_isolang(),
264 "url": to_url,
265 "bool": to_bool,
266 "isolang_2letter": to_isolang(output_format="alpha2"),
267 "country_code": to_country_code,
268 "currency_code": to_currency_code,
269 "license": string_canonicalise(["CC BY", "CC BY-NC", "CC BY-NC-ND", "CC BY-NC-SA", "CC BY-ND", "CC BY-SA", "Not CC-like"], allow_fail=True),
270 "persistent_identifier_scheme": string_canonicalise(["None", "DOI", "Handles", "ARK"], allow_fail=True),
271 "format": string_canonicalise(["PDF", "HTML", "ePUB", "XML"], allow_fail=True),
272 "deposit_policy": string_canonicalise(["None", "Sherpa/Romeo", "Dulcinea", "OAKlist", "Diadorim"], allow_fail=True),
273 }
275 def __init__(self, raw=None, struct=None, construct_raw=True, expose_data=False, properties=None, coerce_map=None, construct_silent_prune=False, construct_maintain_reference=False, *args, **kwargs):
276 # make a shortcut to the object.__getattribute__ function
277 og = object.__getattribute__
279 # if no subclass has set the coerce, then set it from default
280 try:
281 og(self, "_coerce_map")
282 except:
283 self._coerce_map = coerce_map if coerce_map is not None else deepcopy(self.DEFAULT_COERCE)
285 # if no subclass has set the struct, initialise it
286 try:
287 og(self, "_struct")
288 except:
289 self._struct = struct
291 # assign the data if not already assigned by subclass
292 # NOTE: data is not _data deliberately
293 try:
294 og(self, "data")
295 except:
296 self.data = {} if raw is None else raw
298 # properties to allow automatic object API construction
299 # of the form
300 #
301 # {"<public property name>" : ("<path.to.property>", "<data object wrapper>")
302 # e.g
303 # {"identifier" : ("bibjson.identifier", DataObj))}
304 try:
305 og(self, "_properties")
306 except:
307 self._properties = properties if properties is not None else {}
309 # if no subclass has set expose_data, set it
310 try:
311 og(self, "_expose_data")
312 except:
313 self._expose_data = expose_data
315 # if no subclass has set _construct_silent_prune, set it
316 try:
317 og(self, "_construct_silent_prune")
318 except:
319 self._construct_silent_prune = construct_silent_prune
321 # if no subclass has set _construct_maintain_reference, set it
322 try:
323 og(self, "_construct_maintain_reference")
324 except:
325 self._construct_maintain_reference = construct_maintain_reference
327 # restructure the object based on the struct if requried
328 if self._struct is not None and raw is not None and construct_raw:
329 self.data = construct(self.data, self._struct, self._coerce_map, silent_prune=construct_silent_prune, maintain_reference=construct_maintain_reference)
331 # run against the old validation routine
332 # (now deprecated)
333 self.validate()
335 # run the object's native validation routine
336 self.custom_validate()
338 # keep a reference to the current data record, in case something up the inheritance chain messes with it
339 # (I'm looking at you, UserDict).
340 remember_this = self.data
342 # finally, kick the request up
343 super(DataObj, self).__init__(*args, **kwargs)
344 self.data = remember_this
346 def __getattr__(self, name):
348 # workaround to prevent debugger from disconnecting at the deepcopy method
349 # https://stackoverflow.com/questions/32831050/pycharms-debugger-gives-up-when-hitting-copy-deepcopy
350 # if name.startwith("__"):
351 # raise AttributeError
353 if hasattr(self.__class__, name):
354 return object.__getattribute__(self, name)
356 props, data_attrs = self._list_dynamic_properties()
358 # if the name is not in the dynamic properties, raise an attribute error
359 if name not in props and name not in data_attrs:
360 raise AttributeError('{name} is not set'.format(name=name))
362 # otherwise, extract the path from the properties list or the internal data
363 if name in props:
364 path, wrapper = self._properties.get(name)
365 else:
366 path = name
367 wrapper = DataObj
369 # request the internal property directly (which will in-turn raise the AttributeError if necessary)
370 try:
371 return self._get_internal_property(path, wrapper)
372 except AttributeError:
373 # re-wrap the attribute error with the name, rather than the path
374 raise AttributeError('{name} is not set'.format(name=name))
376 def __setattr__(self, key, value):
377 # first set the attribute on any explicitly defined property
378 try:
379 if hasattr(self.__class__, key):
380 # att = object.__getattribute__(self, key)
381 return object.__setattr__(self, key, value)
382 except AttributeError:
383 pass
385 # this could be an internal attribute from the constructor, so we need to make
386 # a special case
387 if key in ["_coerce_map", "_struct", "data", "_properties", "_expose_data"]:
388 return object.__setattr__(self, key, value)
390 props, data_attrs = self._list_dynamic_properties()
392 # extract the path from the properties list or the internal data
393 path = None
394 wrapper = None
395 if key in props:
396 path, wrapper = self._properties.get(key)
397 elif key in data_attrs:
398 path = key
399 wrapper = DataObj
401 # try to set the property on othe internal object
402 if path is not None:
403 wasset = self._set_internal_property(path, value, wrapper)
404 if wasset:
405 return
407 # fall back to the default approach of allowing any attribute to be set on the object
408 return object.__setattr__(self, key, value)
410 def check_construct(self):
411 """
412 Apply the construct to the internal data and throw errors if it is not validated
414 This could be used, for example, if external processes have violated the .data encapsulation, or
415 if internal processes which change .data need to be checked to make sure they haven't strayed outside
416 their remit
418 :return:
419 """
420 if self._struct is not None and self.data is not None:
421 construct(self.data, self._struct, self._coerce_map, silent_prune=False, maintain_reference=False)
423 def validate(self):
424 """
425 DEPRECATED - use 'check_construct' instead.
427 :return:
428 """
429 warnings.warn("DEPRECATED - use 'check_construct' instead.", DeprecationWarning)
430 if self.SCHEMA is not None:
431 validate(self.data, self.SCHEMA)
432 return True
434 def custom_validate(self):
435 pass
437 def populate(self, fields_and_values):
438 for k, v in fields_and_values.items():
439 setattr(self, k, v)
441 def clone(self):
442 return self.__class__(deepcopy(self.data))
444 def json(self):
445 return json.dumps(self.data)
447 def get_struct(self):
448 return self._struct
450 def _get_internal_property(self, path, wrapper=None):
451 # pull the object from the structure, to find out what kind of retrieve it needs
452 # (if there is a struct)
453 type, substruct, instructions = None, None, None
454 if self._struct:
455 type, substruct, instructions = construct_lookup(path, self._struct)
457 if type is None:
458 # if there is no struct, or no object mapping was found, try to pull the path
459 # as a single node (may be a field, list or dict, we'll find out in a mo)
460 val = self._get_single(path)
462 # if this is a dict or a list and a wrapper is supplied, wrap it
463 if wrapper is not None:
464 if isinstance(val, dict):
465 return wrapper(val, expose_data=self._expose_data)
466 elif isinstance(val, list) and len(val) > 0:
467 if isinstance(val[0], dict): # just check the first one
468 return [wrapper(v, expose_data=self._expose_data) for v in val]
470 # otherwise, return the raw value if it is not None, or raise an AttributeError
471 if val is None:
472 raise AttributeError('{name} is not set'.format(name=path))
474 return val
476 if instructions is None:
477 instructions = {}
479 # if the struct contains a reference to the path, always return something, even if it is None - don't raise an AttributeError
480 kwargs = construct_kwargs(type, "get", instructions)
481 coerce_fn = self._coerce_map.get(instructions.get("coerce"))
482 if coerce_fn is not None:
483 kwargs["coerce"] = coerce_fn
485 if type == "field":
486 return self._get_single(path, **kwargs)
487 elif type == "object":
488 d = self._get_single(path, **kwargs)
489 if wrapper:
490 return wrapper(d, substruct, construct_raw=False, expose_data=self._expose_data) # FIXME: this means all substructures are forced to use this classes expose_data policy, whatever it is
491 else:
492 return d
493 elif type == "list":
494 if instructions.get("contains") == "field":
495 return self._get_list(path, **kwargs)
496 elif instructions.get("contains") == "object":
497 l = self._get_list(path, **kwargs)
498 if wrapper:
499 return [wrapper(o, substruct, construct_raw=False, expose_data=self._expose_data) for o in l] # FIXME: this means all substructures are forced to use this classes expose_data policy, whatever it is
500 else:
501 return l
503 # if for whatever reason we get here, raise the AttributeError
504 raise AttributeError('{name} is not set'.format(name=path))
506 def _set_internal_property(self, path, value, wrapper=None):
508 def _wrap_validate(val, wrap, substruct):
509 if wrap is None:
510 if isinstance(val, DataObj):
511 return val.data
512 else:
513 return val
515 else:
516 if isinstance(val, DataObj):
517 if isinstance(val, wrap):
518 return val.data
519 else:
520 raise AttributeError("Attempt to set {x} failed; is not of an allowed type.".format(x=path))
521 else:
522 try:
523 d = wrap(val, substruct)
524 return d.data
525 except DataStructureException as e:
526 raise AttributeError(str(e))
528 # pull the object from the structure, to find out what kind of retrieve it needs
529 # (if there is a struct)
530 type, substruct, instructions = None, None, None
531 if self._struct:
532 type, substruct, instructions = construct_lookup(path, self._struct)
534 # if no type is found, then this means that either the struct was undefined, or the
535 # path did not point to a valid point in the struct. In the case that the struct was
536 # defined, this means the property is trying to set something outside the struct, which
537 # isn't allowed. So, only set types which are None against objects which don't define
538 # the struct.
539 if type is None:
540 if self._struct is None:
541 if isinstance(value, list):
542 value = [_wrap_validate(v, wrapper, None) for v in value]
543 self._set_list(path, value)
544 else:
545 value = _wrap_validate(value, wrapper, None)
546 self._set_single(path, value)
548 return True
549 else:
550 return False
552 if instructions is None:
553 instructions = {}
555 kwargs = construct_kwargs(type, "set", instructions)
556 coerce_fn = self._coerce_map.get(instructions.get("coerce"))
557 if coerce_fn is not None:
558 kwargs["coerce"] = coerce_fn
560 if type == "field":
561 self._set_single(path, value, **kwargs)
562 return True
563 elif type == "object":
564 v = _wrap_validate(value, wrapper, substruct)
565 self._set_single(path, v, **kwargs)
566 return True
567 elif type == "list":
568 if instructions.get("contains") == "field":
569 self._set_list(path, value, **kwargs)
570 return True
571 elif instructions.get("contains") == "object":
572 if not isinstance(value, list):
573 value = [value]
574 vals = [_wrap_validate(v, wrapper, substruct) for v in value]
575 self._set_list(path, vals, **kwargs)
576 return True
578 return False
580 def _list_dynamic_properties(self):
581 # list the dynamic properties the object could have
582 props = []
583 try:
584 # props = og(self, 'properties').keys()
585 props = list(self._properties.keys())
586 except AttributeError:
587 pass
589 data_attrs = []
590 try:
591 if self._expose_data:
592 if self._struct:
593 data_attrs = construct_data_keys(self._struct)
594 else:
595 data_attrs = list(self.data.keys())
596 except AttributeError:
597 pass
599 return props, data_attrs
601 def _add_struct(self, struct):
602 # if the struct is not yet set, set it
603 try:
604 object.__getattribute__(self, "_struct")
605 self._struct = construct_merge(self._struct, struct)
606 except:
607 self._struct = struct
609 def _get_path(self, path, default):
610 parts = path.split(".")
611 context = self.data
613 for i in range(len(parts)):
614 p = parts[i]
615 d = {} if i < len(parts) - 1 else default
616 context = context.get(p, d)
617 return context
619 def _set_path(self, path, val):
620 parts = path.split(".")
621 context = self.data
623 for i in range(len(parts)):
624 p = parts[i]
626 if p not in context and i < len(parts) - 1:
627 context[p] = {}
628 context = context[p]
629 elif p in context and i < len(parts) - 1:
630 context = context[p]
631 else:
632 context[p] = val
634 def _delete_from_list(self, path, val=None, matchsub=None, prune=True, apply_struct_on_matchsub=True):
635 """
636 Note that matchsub will be coerced with the struct if it exists, to ensure
637 that the match is done correctly
639 :param path:
640 :param val:
641 :param matchsub:
642 :param prune:
643 :return:
644 """
645 l = self._get_list(path)
647 removes = []
648 i = 0
649 for entry in l:
650 if val is not None:
651 if entry == val:
652 removes.append(i)
653 elif matchsub is not None:
654 # attempt to coerce the sub
655 if apply_struct_on_matchsub:
656 try:
657 object.__getattribute__(self, "_struct")
658 type, struct, instructions = construct_lookup(path, self._struct)
659 if struct is not None:
660 matchsub = construct(matchsub, struct, self._coerce_map)
661 except:
662 pass
664 matches = 0
665 for k, v in matchsub.items():
666 if entry.get(k) == v:
667 matches += 1
668 if matches == len(list(matchsub.keys())):
669 removes.append(i)
670 i += 1
672 removes.sort(reverse=True)
673 for r in removes:
674 del l[r]
676 if len(l) == 0 and prune:
677 self._delete(path, prune)
679 def _delete(self, path, prune=True):
680 parts = path.split(".")
681 context = self.data
683 stack = []
684 for i in range(len(parts)):
685 p = parts[i]
686 if p in context:
687 if i < len(parts) - 1:
688 stack.append(context[p])
689 context = context[p]
690 else:
691 del context[p]
692 if prune and len(stack) > 0:
693 stack.pop() # the last element was just deleted
694 self._prune_stack(stack)
696 def _prune_stack(self, stack):
697 while len(stack) > 0:
698 context = stack.pop()
699 todelete = []
700 for k, v in context.items():
701 if isinstance(v, dict) and len(list(v.keys())) == 0:
702 todelete.append(k)
703 for d in todelete:
704 del context[d]
706 def _coerce(self, val, cast, accept_failure=False):
707 if cast is None:
708 return val
709 try:
710 return cast(val)
711 except (ValueError, TypeError):
712 if accept_failure:
713 return val
714 raise DataSchemaException("Cast with {x} failed on '{y}' of type {z}".format(x=cast, y=val, z=type(val)))
716 def _get_single(self, path, coerce=None, default=None, allow_coerce_failure=True):
717 # get the value at the point in the object
718 val = self._get_path(path, default)
720 if coerce is not None and val is not None:
721 # if you want to coerce and there is something to coerce do it
722 return self._coerce(val, coerce, accept_failure=allow_coerce_failure)
723 else:
724 # otherwise return the value
725 return val
727 def _get_list(self, path, coerce=None, by_reference=True, allow_coerce_failure=True):
728 # get the value at the point in the object
729 val = self._get_path(path, None)
731 # if there is no value and we want to do by reference, then create it, bind it and return it
732 if val is None and by_reference:
733 mylist = []
734 self._set_single(path, mylist)
735 return mylist
737 # otherwise, default is an empty list
738 elif val is None and not by_reference:
739 return []
741 # check that the val is actually a list
742 if not isinstance(val, list):
743 raise DataSchemaException("Expecting a list at {x} but found {y}".format(x=path, y=val))
745 # if there is a value, do we want to coerce each of them
746 if coerce is not None:
747 coerced = [self._coerce(v, coerce, accept_failure=allow_coerce_failure) for v in val]
748 if by_reference:
749 self._set_single(path, coerced)
750 return coerced
751 else:
752 if by_reference:
753 return val
754 else:
755 return deepcopy(val)
757 def _set_single(self, path, val, coerce=None, allow_coerce_failure=False, allowed_values=None, allowed_range=None,
758 allow_none=True, ignore_none=False):
760 if val is None and ignore_none:
761 return
763 if val is None and not allow_none:
764 raise DataSchemaException("NoneType is not allowed at {x}".format(x=path))
766 # first see if we need to coerce the value (and don't coerce None)
767 if coerce is not None and val is not None:
768 val = self._coerce(val, coerce, accept_failure=allow_coerce_failure)
770 if allowed_values is not None and val not in allowed_values:
771 raise DataSchemaException("Value {x} is not permitted at {y}".format(x=val, y=path))
773 if allowed_range is not None:
774 lower, upper = allowed_range
775 if (lower is not None and val < lower) or (upper is not None and val > upper):
776 raise DataSchemaException("Value {x} is outside the allowed range: {l} - {u}".format(x=val, l=lower, u=upper))
778 # now set it at the path point in the object
779 self._set_path(path, val)
781 def _set_list(self, path, val, coerce=None, allow_coerce_failure=False, allow_none=True, ignore_none=False):
782 # first ensure that the value is a list
783 if not isinstance(val, list):
784 val = [val]
786 # now carry out the None check
787 # for each supplied value, if it is none, and none is not allowed, raise an error if we do not
788 # plan to ignore the nones.
789 for v in val:
790 if v is None and not allow_none:
791 if not ignore_none:
792 raise DataSchemaException("NoneType is not allowed at {x}".format(x=path))
794 # now coerce each of the values, stripping out Nones if necessary
795 val = [self._coerce(v, coerce, accept_failure=allow_coerce_failure) for v in val if v is not None or not ignore_none]
797 # check that the cleaned array isn't empty, and if it is behave appropriately
798 if len(val) == 0:
799 # this is equivalent to a None, so we need to decide what to do
800 if ignore_none:
801 # if we are ignoring nones, just do nothing
802 return
803 elif not allow_none:
804 # if we are not ignoring nones, and not allowing them, raise an error
805 raise DataSchemaException("Empty array not permitted at {x}".format(x=path))
807 # now set it on the path
808 self._set_path(path, val)
810 def _add_to_list(self, path, val, coerce=None, allow_coerce_failure=False, allow_none=False, allowed_values=None, ignore_none=True, unique=False):
811 if val is None and ignore_none:
812 return
814 if val is None and not allow_none:
815 raise DataSchemaException("NoneType is not allowed in list at {x}".format(x=path))
817 if allowed_values is not None and val not in allowed_values:
818 raise DataSchemaException("Value {x} is not permitted at {y}".format(x=val, y=path))
820 # first coerce the value
821 if coerce is not None:
822 val = self._coerce(val, coerce, accept_failure=allow_coerce_failure)
823 current = self._get_list(path, by_reference=True)
825 # if we require the list to be unique, check for the value first
826 if unique:
827 if val in current:
828 return
830 # otherwise, append
831 current.append(val)
833 def _set_with_struct(self, path, val):
834 type, struct, instructions = construct_lookup(path, self._struct)
835 if type == "field":
836 kwargs = construct_kwargs(type, "set", instructions)
837 coerce_fn = self._coerce_map.get(instructions.get("coerce", "unicode"))
838 self._set_single(path, val, coerce=coerce_fn, **kwargs)
839 elif type == "list":
840 if not isinstance(val, list):
841 val = [val]
842 if struct is not None:
843 val = [construct(x, struct, self._coerce_map) for x in val]
844 kwargs = construct_kwargs(type, "set", instructions)
845 coerce_fn = self._coerce_map.get(instructions.get("coerce"))
846 self._set_list(path, val, coerce=coerce_fn, **kwargs)
847 elif type == "object":
848 if struct is not None:
849 val = construct(val, struct, self._coerce_map)
850 self._set_single(path, val)
852 def _add_to_list_with_struct(self, path, val):
853 type, struct, instructions = construct_lookup(path, self._struct)
854 if type != "list":
855 raise DataStructureException("Attempt to add to list {x} failed - it is not a list element".format(x=path))
856 if struct is not None:
857 val = construct(val, struct, self._coerce_map)
858 kwargs = construct_kwargs(type, "set", instructions)
859 self._add_to_list(path, val, **kwargs)
862 def _utf8_unicode(self):
863 """
864 DEPRECATED - use dataobj.to_unicode() instead
865 """
866 return to_unicode()
868 def _int(self):
869 """
870 DEPRECATED - use dataobj.to_int() instead
871 """
872 return to_int()
874 def _float(self):
875 """
876 DEPRECATED - use dataobj.to_float() instead
877 """
878 return to_float()
880 def _date_str(self, in_format=None, out_format=None):
881 """
882 DEPRECATED - use dataobj.date_str instead
883 """
884 return date_str(in_format=in_format, out_format=out_format)
887############################################################
888## Primitive object schema validation
890class ObjectSchemaValidationError(DataObjException):
891 pass
894def validate(obj, schema):
895 """
896 DEPRECATED - use 'construct' instead.
898 :param obj:
899 :param schema:
900 :return:
901 """
902 warnings.warn("DEPRECATED - use 'construct' instead.", DeprecationWarning)
904 # all fields
905 allowed = schema.get("bools", []) + schema.get("fields", []) + schema.get("lists", []) + schema.get("objects", [])
907 for k, v in obj.items():
908 # is k allowed at all
909 if k not in allowed:
910 raise ObjectSchemaValidationError("object contains key " + k + " which is not permitted by schema")
912 # check the bools are bools
913 if k in schema.get("bools", []):
914 if type(v) != bool:
915 raise ObjectSchemaValidationError("object contains " + k + " = " + str(v) + " but expected boolean")
917 # check that the fields are plain old strings
918 if k in schema.get("fields", []):
919 if type(v) != str and type(v) != int and type(v) != float:
920 raise ObjectSchemaValidationError("object contains " + k + " = " + str(v) + " but expected string, unicode or a number")
922 # check that the lists are really lists
923 if k in schema.get("lists", []):
924 if type(v) != list:
925 raise ObjectSchemaValidationError("object contains " + k + " = " + str(v) + " but expected list")
926 # if it is a list, then for each member validate
927 entry_schema = schema.get("list_entries", {}).get(k)
928 if entry_schema is None:
929 # validate the entries as fields
930 for e in v:
931 if type(e) != str and type(e) != int and type(e) != float:
932 raise ObjectSchemaValidationError("list in object contains " + str(type(e)) + " but expected string, unicode or a number in " + k)
933 else:
934 # validate each entry against the schema
935 for e in v:
936 validate(e, entry_schema)
938 # check that the objects are objects
939 if k in schema.get("objects", []):
940 if type(v) != dict:
941 raise ObjectSchemaValidationError("object contains " + k + " = " + str(v) + " but expected object/dict")
942 # if it is an object, then validate
943 object_schema = schema.get("object_entries", {}).get(k)
944 if object_schema is None:
945 #raise ObjectSchemaValidationError("no object entry for object " + k)
946 pass # we are not imposing a schema on this object
947 else:
948 validate(v, object_schema)
951############################################################
952## Data structure coercion
954class DataStructureException(DataObjException):
955 pass
957class ConstructException(DataObjException):
958 pass
960class ScriptTagFoundException(DataObjException):
961 """
962 Exception to raise when script tag has been found in metadata
963 """
964 pass
967def construct_validate(struct, context=""):
968 """
969 Is the provided struct of the correct form
970 {
971 "fields" : {
972 "field_name" : {"coerce" :"coerce_function", **kwargs}
973 },
974 "objects" : [
975 "field_name"
976 ],
977 "lists" : {
978 "field_name" : {"contains" : "object|field", "coerce" : "field_coerce_function, **kwargs}
979 },
980 "required" : ["field_name"],
981 "structs" : {
982 "field_name" : {
983 <construct>
984 }
985 }
986 }
987 """
988 # check that only the allowed keys are present
989 keys = struct.keys()
990 for k in keys:
991 if k not in ["fields", "objects", "lists", "required", "structs"]:
992 c = context if context != "" else "root"
993 raise ConstructException("Key '{x}' present in struct at '{y}', but is not permitted".format(x=k, y=c))
995 # now go through and make sure the fields are the right shape:
996 for field_name, instructions in struct.get("fields", {}).items():
997 if "coerce" not in instructions:
998 c = context if context != "" else "root"
999 raise ConstructException("Coerce function not listed in field '{x}' at '{y}'".format(x=field_name, y=c))
1000 for k,v in instructions.items():
1001 if not isinstance(v, list) and not isinstance(v, str):
1002 c = context if context != "" else "root"
1003 raise ConstructException("Argument '{a}' in field '{b}' at '{c}' is not a string or list".format(a=k, b=field_name, c=c))
1005 # then make sure the objects are ok
1006 for o in struct.get("objects", []):
1007 if not isinstance(o, str):
1008 c = context if context != "" else "root"
1009 raise ConstructException("There is a non-string value in the object list at '{y}'".format(y=c))
1011 # make sure the lists are correct
1012 for field_name, instructions in struct.get("lists", {}).items():
1013 contains = instructions.get("contains")
1014 if contains is None:
1015 c = context if context != "" else "root"
1016 raise ConstructException("No 'contains' argument in list definition for field '{x}' at '{y}'".format(x=field_name, y=c))
1017 if contains not in ["object", "field"]:
1018 c = context if context != "" else "root"
1019 raise ConstructException("'contains' argument in list '{x}' at '{y}' contains illegal value '{z}'".format(x=field_name, y=c, z=contains))
1020 for k,v in instructions.items():
1021 if not isinstance(v, list) and not isinstance(v, str):
1022 c = context if context != "" else "root"
1023 raise ConstructException("Argument '{a}' in list '{b}' at '{c}' is not a string or list".format(a=k, b=field_name, c=c))
1025 # make sure the requireds are correct
1026 for o in struct.get("required", []):
1027 if not isinstance(o, str):
1028 c = context if context != "" else "root"
1029 raise ConstructException("There is a non-string value in the required list at '{y}'".format(y=c))
1031 # now do the structs, which will involve some recursion
1032 substructs = struct.get("structs", {})
1034 # first check that there are no previously unknown keys in there
1035 possibles = struct.get("objects", []) + list(struct.get("lists", {}).keys())
1036 for s in substructs:
1037 if s not in possibles:
1038 c = context if context != "" else "root"
1039 raise ConstructException("struct contains key '{a}' which is not listed in object or list definitions at '{x}'".format(a=s, x=c))
1041 # now recurse into each struct
1042 for k,v in substructs.items():
1043 nc = context
1044 if nc == "":
1045 nc = k
1046 else:
1047 nc += "." + k
1048 construct_validate(v, context=nc)
1050 return True
1053def construct(obj, struct, coerce, context="", silent_prune=False, maintain_reference=False):
1054 """
1055 {
1056 "fields" : {
1057 "field_name" : {"coerce" :"coerce_function", **kwargs}
1059 },
1060 "objects" : [
1061 "field_name"
1062 ],
1063 "lists" : {
1064 "field_name" : {"contains" : "object|field", "coerce" : "field_coerce_function, **kwargs}
1065 },
1066 "required" : ["field_name"],
1067 "structs" : {
1068 "field_name" : {
1069 <construct>
1070 }
1071 }
1072 }
1074 :param obj:
1075 :param struct:
1076 :param coerce:
1077 :return:
1078 """
1079 if obj is None:
1080 return None
1082 # check that all the required fields are there
1083 try:
1084 keys = list(obj.keys())
1085 except:
1086 c = context if context != "" else "root"
1087 raise DataStructureException("Expected an object at {c} but found something else instead".format(c=c))
1089 for r in struct.get("required", []):
1090 if r not in keys:
1091 c = context if context != "" else "root"
1092 raise DataStructureException("Field '{r}' is required but not present at '{c}'".format(r=r, c=c))
1094 # check that there are no fields that are not allowed
1095 # Note that since the construct mechanism copies fields explicitly, silent_prune literally just turns off this
1096 # check
1097 if not silent_prune:
1098 allowed = list(struct.get("fields", {}).keys()) + struct.get("objects", []) + list(struct.get("lists", {}).keys())
1099 for k in keys:
1100 if k not in allowed:
1101 c = context if context != "" else "root"
1102 raise DataStructureException("Field '{k}' is not permitted at '{c}'".format(k=k, c=c))
1105 # this is the new object we'll be creating from the old
1106 constructed = DataObj()
1108 # now check all the fields
1109 for field_name, instructions in struct.get("fields", {}).items():
1110 val = obj.get(field_name)
1111 if val is None:
1112 continue
1113 coerce_fn = coerce.get(instructions.get("coerce", "unicode"))
1114 if coerce_fn is None:
1115 raise DataStructureException("No coersion function defined for type '{x}' at '{c}'".format(x=instructions.get("coerce", "unicode"), c=context + field_name))
1117 kwargs = construct_kwargs("field", "set", instructions)
1119 try:
1120 constructed._set_single(field_name, val, coerce=coerce_fn, **kwargs)
1121 except DataSchemaException as e:
1122 raise DataStructureException("Schema exception at '{a}', {b}".format(a=context + field_name, b=str(e)))
1124 # next check all the objetcs (which will involve a recursive call to this function)
1125 for field_name in struct.get("objects", []):
1126 val = obj.get(field_name)
1127 if val is None:
1128 continue
1129 if type(val) != dict:
1130 raise DataStructureException("Found '{x}' = '{y}' but expected object/dict".format(x=context + field_name, y=val))
1132 instructions = struct.get("structs", {}).get(field_name)
1134 if instructions is None:
1135 # this is the lowest point at which we have instructions, so just accept the data structure as-is
1136 # (taking a deep copy to destroy any references)
1137 try:
1138 constructed._set_single(field_name, deepcopy(val))
1139 except DataSchemaException as e:
1140 raise DataStructureException(str(e))
1141 else:
1142 # we need to recurse further down
1143 beneath = construct(val, instructions, coerce=coerce, context=context + field_name + ".", silent_prune=silent_prune)
1145 # what we get back is the correct sub-data structure, which we can then store
1146 try:
1147 constructed._set_single(field_name, beneath)
1148 except DataSchemaException as e:
1149 raise DataStructureException(str(e))
1151 # now check all the lists
1152 for field_name, instructions in struct.get("lists", {}).items():
1153 vals = obj.get(field_name)
1154 if vals is None:
1155 continue
1156 if not isinstance(vals, list):
1157 raise DataStructureException("Expecting list at {x} but found something else".format(x=context + field_name))
1159 # prep the keyword arguments for the setters
1160 kwargs = construct_kwargs("list", "set", instructions)
1162 contains = instructions.get("contains")
1163 if contains == "field":
1164 # coerce all the values in the list
1165 coerce_fn = coerce.get(instructions.get("coerce", "unicode"))
1166 if coerce_fn is None:
1167 raise DataStructureException("No coersion function defined for type '{x}' at '{c}'".format(x=instructions.get("coerce", "unicode"), c=context + field_name))
1169 for i in range(len(vals)):
1170 val = vals[i]
1171 try:
1172 constructed._add_to_list(field_name, val, coerce=coerce_fn, **kwargs)
1173 except DataSchemaException as e:
1174 raise DataStructureException(str(e))
1176 elif contains == "object":
1177 # for each object in the list, send it for construction
1178 for i in range(len(vals)):
1179 val = vals[i]
1181 if type(val) != dict:
1182 raise DataStructureException("Found '{x}[{p}]' = '{y}' but expected object/dict".format(x=context + field_name, y=val, p=i))
1184 subinst = struct.get("structs", {}).get(field_name)
1185 if subinst is None:
1186 try:
1187 constructed._add_to_list(field_name, deepcopy(val))
1188 except DataSchemaException as e:
1189 raise DataStructureException(str(e))
1190 else:
1191 # we need to recurse further down
1192 beneath = construct(val, subinst, coerce=coerce, context=context + field_name + "[" + str(i) + "].", silent_prune=silent_prune)
1194 # what we get back is the correct sub-data structure, which we can then store
1195 try:
1196 constructed._add_to_list(field_name, beneath)
1197 except DataSchemaException as e:
1198 raise DataStructureException(str(e))
1200 else:
1201 raise DataStructureException("Cannot understand structure where list '{x}' elements contain '{y}'".format(x=context + field_name, y=contains))
1203 if maintain_reference:
1204 obj.clear()
1205 obj.update(constructed.data)
1206 return obj
1207 else:
1208 return constructed.data
1211def construct_merge(target, source):
1212 merged = deepcopy(target)
1214 for field, instructions in source.get("fields", {}).items():
1215 if "fields" not in merged:
1216 merged["fields"] = {}
1217 if field not in merged["fields"]:
1218 merged["fields"][field] = deepcopy(instructions)
1220 for obj in source.get("objects", []):
1221 if "objects" not in merged:
1222 merged["objects"] = []
1223 if obj not in merged["objects"]:
1224 merged["objects"].append(obj)
1226 for field, instructions in source.get("lists", {}).items():
1227 if "lists" not in merged:
1228 merged["lists"] = {}
1229 if field not in merged["lists"]:
1230 merged["lists"][field] = deepcopy(instructions)
1232 for r in source.get("required", []):
1233 if "required" not in merged:
1234 merged["required"] = []
1235 if r not in merged["required"]:
1236 merged["required"].append(r)
1238 for field, struct in source.get("structs", {}).items():
1239 if "structs" not in merged:
1240 merged["structs"] = {}
1241 if field not in merged["structs"]:
1242 merged["structs"][field] = deepcopy(struct)
1243 else:
1244 # recursively merge
1245 merged["structs"][field] = construct_merge(merged["structs"][field], struct)
1247 return merged
1249def construct_lookup(path, struct):
1250 bits = path.split(".")
1252 # if there's more than one path element, we will need to recurse
1253 if len(bits) > 1:
1254 # it has to be an object, in order for the path to still have multiple
1255 # segments
1256 if bits[0] not in struct.get("objects", []):
1257 return None, None, None
1258 substruct = struct.get("structs", {}).get(bits[0])
1259 return construct_lookup(".".join(bits[1:]), substruct)
1260 elif len(bits) == 1:
1261 # first check the fields
1262 instructions = struct.get("fields", {}).get(bits[0])
1263 if instructions is not None:
1264 return "field", None, instructions
1266 # then check the lists
1267 instructions = struct.get("lists", {}).get(bits[0])
1268 if instructions is not None:
1269 structure = struct.get("structs", {}).get(bits[0])
1270 return "list", structure, instructions
1272 # then check the objects
1273 if bits[0] in struct.get("objects", []):
1274 structure = struct.get("structs", {}).get(bits[0])
1275 return "object", structure, None
1277 return None, None, None
1279def construct_kwargs(type, dir, instructions):
1280 # if there are no instructions there are no kwargs
1281 if instructions is None:
1282 return {}
1284 # take a copy of the instructions that we can modify
1285 kwargs = deepcopy(instructions)
1287 # remove the known arguments for the field type
1288 if type == "field":
1289 if "coerce" in kwargs:
1290 del kwargs["coerce"]
1292 elif type == "list":
1293 if "coerce" in kwargs:
1294 del kwargs["coerce"]
1295 if "contains" in kwargs:
1296 del kwargs["contains"]
1298 nk = {}
1299 if dir == "set":
1300 for k, v in kwargs.items():
1301 # basically everything is a "set" argument unless explicitly stated to be a "get" argument
1302 if not k.startswith("get__"):
1303 if k.startswith("set__"): # if it starts with the set__ prefix, remove it
1304 k = k[5:]
1305 nk[k] = v
1306 elif dir == "get":
1307 for k, v in kwargs.items():
1308 # must start with "get" argument
1309 if k.startswith("get__"):
1310 nk[k[5:]] = v
1312 return nk
1314def construct_data_keys(struct):
1315 return list(struct.get("fields", {})) + list(struct.get("objects", [])) + list(struct.get("lists", {}))
1317def merge_outside_construct(struct, target, source):
1318 merged = deepcopy(target)
1320 for source_key in source.keys():
1321 # if the source_key is one of the struct's fields, ignore it
1322 if source_key in list(struct.get("fields", {}).keys()):
1323 continue
1325 # if the source_key is one of the struct's lists, ignore it
1326 if source_key in list(struct.get("lists", {}).keys()):
1327 continue
1329 # if the source_key is one of the struct's object, we will need to go deeper
1330 if source_key in struct.get("objects", []):
1331 subsource = source[source_key]
1332 subtarget = target.get(source_key, {})
1333 substruct = struct.get("structs", {}).get(source_key, {})
1334 merged[source_key] = merge_outside_construct(substruct, subtarget, subsource)
1335 continue
1337 # if we get to here, the field in the source is not represented at this level in the struct,
1338 # so we should copy it over in full (unless the target already has a value here)
1339 if source_key not in merged:
1340 merged[source_key] = deepcopy(source[source_key])
1342 return merged
1344############################################################
1345## Unit test support
1347def test_dataobj(obj, fields_and_values):
1348 """
1349 Test a dataobj to make sure that the getters and setters you have specified
1350 are working correctly.
1352 Provide it a data object and a list of fields with the values to set and the expeceted return values (if required):
1354 {
1355 "key" : ("set value", "get value")
1356 }
1358 If you provide only the set value, then the get value will be required to be the same as the set value in the test
1360 {
1361 "key" : "set value"
1362 }
1364 :param obj:
1365 :param fields_and_values:
1366 :return:
1367 """
1368 for k, valtup in fields_and_values.items():
1369 if not isinstance(valtup, tuple):
1370 valtup = (valtup,)
1371 set_val = valtup[0]
1372 try:
1373 setattr(obj, k, set_val)
1374 except AttributeError:
1375 assert False, "Unable to set attribute {x} with value {y}".format(x=k, y=set_val)
1377 for k, valtup in fields_and_values.items():
1378 if not isinstance(valtup, tuple):
1379 valtup = (valtup,)
1380 get_val = valtup[0]
1381 if len(valtup) > 1:
1382 get_val = valtup[1]
1383 val = getattr(obj, k)
1384 assert val == get_val, (k, val, get_val)