Coverage for portality/lib/dataobj.py: 48%

816 statements  

« prev     ^ index     » next       coverage.py v6.4.2, created at 2022-07-20 16:12 +0100

1# -*- coding: UTF-8 -*- 

2 

3from portality.lib import dates 

4from portality.datasets import get_country_code, get_currency_code 

5from copy import deepcopy 

6import locale, json, warnings 

7from urllib.parse import urlparse 

8from datetime import date, datetime 

9 

10######################################################### 

11## Data coerce functions 

12 

13def to_currency_code(val): 

14 if val is None: 

15 return None 

16 nv = get_currency_code(val) 

17 if nv is None: 

18 raise ValueError("Unable to convert {x} to a valid currency code".format(x=val)) 

19 uc = to_unicode() 

20 return uc(nv) 

21 

22def to_country_code(val): 

23 if val is None: 

24 return None 

25 nv = get_country_code(val, fail_if_not_found=True) 

26 if nv is None: 

27 raise ValueError("Unable to convert {x} to a valid country code".format(x=val)) 

28 uc = to_unicode() 

29 return uc(nv) 

30 

31def to_unicode(): 

32 def to_utf8_unicode(val): 

33 if isinstance(val, str): 

34 return val 

35 elif isinstance(val, str): 

36 try: 

37 return val.decode("utf8", "strict") 

38 except UnicodeDecodeError: 

39 raise ValueError("Could not decode string") 

40 else: 

41 return str(val) 

42 

43 return to_utf8_unicode 

44 

45 

46def to_unicode_upper(val): 

47 ufn = to_unicode() 

48 val = ufn(val) 

49 return val.upper() 

50 

51def to_int(): 

52 def intify(val): 

53 # strip any characters that are outside the ascii range - they won't make up the int anyway 

54 # and this will get rid of things like strange currency marks 

55 if isinstance(val, str): 

56 val = val.encode("ascii", errors="ignore") 

57 

58 # try the straight cast 

59 try: 

60 return int(val) 

61 except ValueError: 

62 pass 

63 

64 # could have commas in it, so try stripping them 

65 try: 

66 return int(val.replace(",", "")) 

67 except ValueError: 

68 pass 

69 

70 # try the locale-specific approach 

71 try: 

72 return locale.atoi(val) 

73 except ValueError: 

74 pass 

75 

76 raise ValueError("Could not convert string to int: {x}".format(x=val)) 

77 

78 return intify 

79 

80def to_float(): 

81 def floatify(val): 

82 # strip any characters that are outside the ascii range - they won't make up the float anyway 

83 # and this will get rid of things like strange currency marks 

84 if isinstance(val, str): 

85 val = val.encode("ascii", errors="ignore") 

86 

87 # try the straight cast 

88 try: 

89 return float(val) 

90 except ValueError: 

91 pass 

92 

93 # could have commas in it, so try stripping them 

94 try: 

95 return float(val.replace(",", "")) 

96 except ValueError: 

97 pass 

98 

99 # try the locale-specific approach 

100 try: 

101 return locale.atof(val) 

102 except ValueError: 

103 pass 

104 

105 raise ValueError("Could not convert string to float: {x}".format(x=val)) 

106 

107 return floatify 

108 

109def date_str(in_format=None, out_format=None): 

110 def datify(val): 

111 if val is None or val == "": 

112 return None 

113 if isinstance(val, date) or isinstance(val, datetime): 

114 return dates.format(val, format=out_format) 

115 else: 

116 return dates.reformat(val, in_format=in_format, out_format=out_format) 

117 

118 return datify 

119 

120def to_datestamp(in_format=None): 

121 def stampify(val): 

122 return dates.parse(val, format=in_format) 

123 

124 return stampify 

125 

126def to_isolang(output_format=None): 

127 """ 

128 :param output_format: format from input source to putput. Must be one of: 

129 * alpha3 

130 * alt3 

131 * alpha2 

132 * name 

133 * fr 

134 Can be a list in order of preference, too 

135 fixme: we could make these pycountry's keys, removing the need for so many transformations and intermediate steps 

136 :return: 

137 """ 

138 # delayed import, since we may not always want to load the whole dataset for a dataobj 

139 from portality.lib import isolang as dataset 

140 

141 # sort out the output format list 

142 if output_format is None: 

143 output_format = ["alpha3"] 

144 if not isinstance(output_format, list): 

145 output_format = [output_format] 

146 

147 def isolang(val): 

148 if val is None: 

149 return None 

150 l = dataset.find(val) 

151 if l is None: 

152 raise ValueError("Unable to find iso code for language {x}".format(x=val)) 

153 for f in output_format: 

154 v = l.get(f) 

155 if v is None or v == "": 

156 continue 

157 return v 

158 

159 return isolang 

160 

161def to_url(val): 

162 if not isinstance(val, str): 

163 raise ValueError("Argument passed to to_url was not a string, but type '{t}': '{val}'".format(t=type(val),val=val)) 

164 

165 val = val.strip() 

166 

167 if val == '': 

168 return val 

169 

170 # parse with urlparse 

171 url = urlparse(val) 

172 

173 # now check the url has the minimum properties that we require 

174 if url.scheme and url.scheme.startswith("http"): 

175 uc = to_unicode() 

176 return uc(val) 

177 else: 

178 raise ValueError("Could not convert string {val} to viable URL".format(val=val)) 

179 

180def to_bool(val): 

181 """Conservative boolean cast - don't cast lists and objects to True, just existing booleans and strings.""" 

182 if val is None: 

183 return None 

184 if val is True or val is False: 

185 return val 

186 

187 if isinstance(val, str): 

188 if val.lower() == 'true': 

189 return True 

190 elif val.lower() == 'false': 

191 return False 

192 raise ValueError("Could not convert string {val} to boolean. Expecting string to either say 'true' or 'false' (not case-sensitive).".format(val=val)) 

193 

194 raise ValueError("Could not convert {val} to boolean. Expect either boolean or string.".format(val=val)) 

195 

196def string_canonicalise(canon, allow_fail=False): 

197 normalised = {} 

198 for a in canon: 

199 normalised[a.strip().lower()] = a 

200 

201 def sn(val): 

202 if val is None: 

203 if allow_fail: 

204 return None 

205 raise ValueError("NoneType not permitted") 

206 

207 try: 

208 norm = val.strip().lower() 

209 except: 

210 raise ValueError("Unable to treat value as a string") 

211 

212 uc = to_unicode() 

213 if norm in normalised: 

214 return uc(normalised[norm]) 

215 if allow_fail: 

216 return uc(val) 

217 

218 raise ValueError("Unable to canonicalise string") 

219 

220 return sn 

221 

222############################################################ 

223 

224############################################################ 

225# The core data object which manages all the interactions 

226# with the underlying data member variable 

227 

228 

229class DataObjException(Exception): 

230 def __init__(self, *args, **kwargs): 

231 try: 

232 self.message = args[0] 

233 except IndexError: 

234 self.message = '' 

235 super(DataObjException, self).__init__(*args, **kwargs) 

236 

237 

238class DataSchemaException(DataObjException): 

239 pass 

240 

241 

242class DataObj(object): 

243 """ 

244 Class which provides services to other classes which store their internal data 

245 as a python data structure in the self.data field. 

246 """ 

247 

248 SCHEMA = None 

249 

250 DEFAULT_COERCE = { 

251 # NOTE - if you add something to the default coerce, add it to the default swagger 

252 # translation dict below as well. Furthermore if you're adding 

253 # custom stuff to the coerce, you will likely need to add an entry 

254 # to the swagger translation table as well, in the same way you 

255 # extend the coerce map. 

256 "unicode": to_unicode(), 

257 "unicode_upper" : to_unicode_upper, 

258 "utcdatetime": date_str(), 

259 "utcdatetimemicros" : date_str(out_format="%Y-%m-%dT%H:%M:%S.%fZ"), 

260 "bigenddate" : date_str(out_format="%Y-%m-%d"), 

261 "integer": to_int(), 

262 "float": to_float(), 

263 "isolang": to_isolang(), 

264 "url": to_url, 

265 "bool": to_bool, 

266 "isolang_2letter": to_isolang(output_format="alpha2"), 

267 "country_code": to_country_code, 

268 "currency_code": to_currency_code, 

269 "license": string_canonicalise(["CC BY", "CC BY-NC", "CC BY-NC-ND", "CC BY-NC-SA", "CC BY-ND", "CC BY-SA", "Not CC-like"], allow_fail=True), 

270 "persistent_identifier_scheme": string_canonicalise(["None", "DOI", "Handles", "ARK"], allow_fail=True), 

271 "format": string_canonicalise(["PDF", "HTML", "ePUB", "XML"], allow_fail=True), 

272 "deposit_policy": string_canonicalise(["None", "Sherpa/Romeo", "Dulcinea", "OAKlist", "Diadorim"], allow_fail=True), 

273 } 

274 

275 def __init__(self, raw=None, struct=None, construct_raw=True, expose_data=False, properties=None, coerce_map=None, construct_silent_prune=False, construct_maintain_reference=False, *args, **kwargs): 

276 # make a shortcut to the object.__getattribute__ function 

277 og = object.__getattribute__ 

278 

279 # if no subclass has set the coerce, then set it from default 

280 try: 

281 og(self, "_coerce_map") 

282 except: 

283 self._coerce_map = coerce_map if coerce_map is not None else deepcopy(self.DEFAULT_COERCE) 

284 

285 # if no subclass has set the struct, initialise it 

286 try: 

287 og(self, "_struct") 

288 except: 

289 self._struct = struct 

290 

291 # assign the data if not already assigned by subclass 

292 # NOTE: data is not _data deliberately 

293 try: 

294 og(self, "data") 

295 except: 

296 self.data = {} if raw is None else raw 

297 

298 # properties to allow automatic object API construction 

299 # of the form 

300 # 

301 # {"<public property name>" : ("<path.to.property>", "<data object wrapper>") 

302 # e.g 

303 # {"identifier" : ("bibjson.identifier", DataObj))} 

304 try: 

305 og(self, "_properties") 

306 except: 

307 self._properties = properties if properties is not None else {} 

308 

309 # if no subclass has set expose_data, set it 

310 try: 

311 og(self, "_expose_data") 

312 except: 

313 self._expose_data = expose_data 

314 

315 # if no subclass has set _construct_silent_prune, set it 

316 try: 

317 og(self, "_construct_silent_prune") 

318 except: 

319 self._construct_silent_prune = construct_silent_prune 

320 

321 # if no subclass has set _construct_maintain_reference, set it 

322 try: 

323 og(self, "_construct_maintain_reference") 

324 except: 

325 self._construct_maintain_reference = construct_maintain_reference 

326 

327 # restructure the object based on the struct if requried 

328 if self._struct is not None and raw is not None and construct_raw: 

329 self.data = construct(self.data, self._struct, self._coerce_map, silent_prune=construct_silent_prune, maintain_reference=construct_maintain_reference) 

330 

331 # run against the old validation routine 

332 # (now deprecated) 

333 self.validate() 

334 

335 # run the object's native validation routine 

336 self.custom_validate() 

337 

338 # keep a reference to the current data record, in case something up the inheritance chain messes with it 

339 # (I'm looking at you, UserDict). 

340 remember_this = self.data 

341 

342 # finally, kick the request up 

343 super(DataObj, self).__init__(*args, **kwargs) 

344 self.data = remember_this 

345 

346 def __getattr__(self, name): 

347 

348 # workaround to prevent debugger from disconnecting at the deepcopy method 

349 # https://stackoverflow.com/questions/32831050/pycharms-debugger-gives-up-when-hitting-copy-deepcopy 

350 # if name.startwith("__"): 

351 # raise AttributeError 

352 

353 if hasattr(self.__class__, name): 

354 return object.__getattribute__(self, name) 

355 

356 props, data_attrs = self._list_dynamic_properties() 

357 

358 # if the name is not in the dynamic properties, raise an attribute error 

359 if name not in props and name not in data_attrs: 

360 raise AttributeError('{name} is not set'.format(name=name)) 

361 

362 # otherwise, extract the path from the properties list or the internal data 

363 if name in props: 

364 path, wrapper = self._properties.get(name) 

365 else: 

366 path = name 

367 wrapper = DataObj 

368 

369 # request the internal property directly (which will in-turn raise the AttributeError if necessary) 

370 try: 

371 return self._get_internal_property(path, wrapper) 

372 except AttributeError: 

373 # re-wrap the attribute error with the name, rather than the path 

374 raise AttributeError('{name} is not set'.format(name=name)) 

375 

376 def __setattr__(self, key, value): 

377 # first set the attribute on any explicitly defined property 

378 try: 

379 if hasattr(self.__class__, key): 

380 # att = object.__getattribute__(self, key) 

381 return object.__setattr__(self, key, value) 

382 except AttributeError: 

383 pass 

384 

385 # this could be an internal attribute from the constructor, so we need to make 

386 # a special case 

387 if key in ["_coerce_map", "_struct", "data", "_properties", "_expose_data"]: 

388 return object.__setattr__(self, key, value) 

389 

390 props, data_attrs = self._list_dynamic_properties() 

391 

392 # extract the path from the properties list or the internal data 

393 path = None 

394 wrapper = None 

395 if key in props: 

396 path, wrapper = self._properties.get(key) 

397 elif key in data_attrs: 

398 path = key 

399 wrapper = DataObj 

400 

401 # try to set the property on othe internal object 

402 if path is not None: 

403 wasset = self._set_internal_property(path, value, wrapper) 

404 if wasset: 

405 return 

406 

407 # fall back to the default approach of allowing any attribute to be set on the object 

408 return object.__setattr__(self, key, value) 

409 

410 def check_construct(self): 

411 """ 

412 Apply the construct to the internal data and throw errors if it is not validated 

413 

414 This could be used, for example, if external processes have violated the .data encapsulation, or 

415 if internal processes which change .data need to be checked to make sure they haven't strayed outside 

416 their remit 

417 

418 :return: 

419 """ 

420 if self._struct is not None and self.data is not None: 

421 construct(self.data, self._struct, self._coerce_map, silent_prune=False, maintain_reference=False) 

422 

423 def validate(self): 

424 """ 

425 DEPRECATED - use 'check_construct' instead. 

426 

427 :return: 

428 """ 

429 warnings.warn("DEPRECATED - use 'check_construct' instead.", DeprecationWarning) 

430 if self.SCHEMA is not None: 

431 validate(self.data, self.SCHEMA) 

432 return True 

433 

434 def custom_validate(self): 

435 pass 

436 

437 def populate(self, fields_and_values): 

438 for k, v in fields_and_values.items(): 

439 setattr(self, k, v) 

440 

441 def clone(self): 

442 return self.__class__(deepcopy(self.data)) 

443 

444 def json(self): 

445 return json.dumps(self.data) 

446 

447 def get_struct(self): 

448 return self._struct 

449 

450 def _get_internal_property(self, path, wrapper=None): 

451 # pull the object from the structure, to find out what kind of retrieve it needs 

452 # (if there is a struct) 

453 type, substruct, instructions = None, None, None 

454 if self._struct: 

455 type, substruct, instructions = construct_lookup(path, self._struct) 

456 

457 if type is None: 

458 # if there is no struct, or no object mapping was found, try to pull the path 

459 # as a single node (may be a field, list or dict, we'll find out in a mo) 

460 val = self._get_single(path) 

461 

462 # if this is a dict or a list and a wrapper is supplied, wrap it 

463 if wrapper is not None: 

464 if isinstance(val, dict): 

465 return wrapper(val, expose_data=self._expose_data) 

466 elif isinstance(val, list) and len(val) > 0: 

467 if isinstance(val[0], dict): # just check the first one 

468 return [wrapper(v, expose_data=self._expose_data) for v in val] 

469 

470 # otherwise, return the raw value if it is not None, or raise an AttributeError 

471 if val is None: 

472 raise AttributeError('{name} is not set'.format(name=path)) 

473 

474 return val 

475 

476 if instructions is None: 

477 instructions = {} 

478 

479 # if the struct contains a reference to the path, always return something, even if it is None - don't raise an AttributeError 

480 kwargs = construct_kwargs(type, "get", instructions) 

481 coerce_fn = self._coerce_map.get(instructions.get("coerce")) 

482 if coerce_fn is not None: 

483 kwargs["coerce"] = coerce_fn 

484 

485 if type == "field": 

486 return self._get_single(path, **kwargs) 

487 elif type == "object": 

488 d = self._get_single(path, **kwargs) 

489 if wrapper: 

490 return wrapper(d, substruct, construct_raw=False, expose_data=self._expose_data) # FIXME: this means all substructures are forced to use this classes expose_data policy, whatever it is 

491 else: 

492 return d 

493 elif type == "list": 

494 if instructions.get("contains") == "field": 

495 return self._get_list(path, **kwargs) 

496 elif instructions.get("contains") == "object": 

497 l = self._get_list(path, **kwargs) 

498 if wrapper: 

499 return [wrapper(o, substruct, construct_raw=False, expose_data=self._expose_data) for o in l] # FIXME: this means all substructures are forced to use this classes expose_data policy, whatever it is 

500 else: 

501 return l 

502 

503 # if for whatever reason we get here, raise the AttributeError 

504 raise AttributeError('{name} is not set'.format(name=path)) 

505 

506 def _set_internal_property(self, path, value, wrapper=None): 

507 

508 def _wrap_validate(val, wrap, substruct): 

509 if wrap is None: 

510 if isinstance(val, DataObj): 

511 return val.data 

512 else: 

513 return val 

514 

515 else: 

516 if isinstance(val, DataObj): 

517 if isinstance(val, wrap): 

518 return val.data 

519 else: 

520 raise AttributeError("Attempt to set {x} failed; is not of an allowed type.".format(x=path)) 

521 else: 

522 try: 

523 d = wrap(val, substruct) 

524 return d.data 

525 except DataStructureException as e: 

526 raise AttributeError(str(e)) 

527 

528 # pull the object from the structure, to find out what kind of retrieve it needs 

529 # (if there is a struct) 

530 type, substruct, instructions = None, None, None 

531 if self._struct: 

532 type, substruct, instructions = construct_lookup(path, self._struct) 

533 

534 # if no type is found, then this means that either the struct was undefined, or the 

535 # path did not point to a valid point in the struct. In the case that the struct was 

536 # defined, this means the property is trying to set something outside the struct, which 

537 # isn't allowed. So, only set types which are None against objects which don't define 

538 # the struct. 

539 if type is None: 

540 if self._struct is None: 

541 if isinstance(value, list): 

542 value = [_wrap_validate(v, wrapper, None) for v in value] 

543 self._set_list(path, value) 

544 else: 

545 value = _wrap_validate(value, wrapper, None) 

546 self._set_single(path, value) 

547 

548 return True 

549 else: 

550 return False 

551 

552 if instructions is None: 

553 instructions = {} 

554 

555 kwargs = construct_kwargs(type, "set", instructions) 

556 coerce_fn = self._coerce_map.get(instructions.get("coerce")) 

557 if coerce_fn is not None: 

558 kwargs["coerce"] = coerce_fn 

559 

560 if type == "field": 

561 self._set_single(path, value, **kwargs) 

562 return True 

563 elif type == "object": 

564 v = _wrap_validate(value, wrapper, substruct) 

565 self._set_single(path, v, **kwargs) 

566 return True 

567 elif type == "list": 

568 if instructions.get("contains") == "field": 

569 self._set_list(path, value, **kwargs) 

570 return True 

571 elif instructions.get("contains") == "object": 

572 if not isinstance(value, list): 

573 value = [value] 

574 vals = [_wrap_validate(v, wrapper, substruct) for v in value] 

575 self._set_list(path, vals, **kwargs) 

576 return True 

577 

578 return False 

579 

580 def _list_dynamic_properties(self): 

581 # list the dynamic properties the object could have 

582 props = [] 

583 try: 

584 # props = og(self, 'properties').keys() 

585 props = list(self._properties.keys()) 

586 except AttributeError: 

587 pass 

588 

589 data_attrs = [] 

590 try: 

591 if self._expose_data: 

592 if self._struct: 

593 data_attrs = construct_data_keys(self._struct) 

594 else: 

595 data_attrs = list(self.data.keys()) 

596 except AttributeError: 

597 pass 

598 

599 return props, data_attrs 

600 

601 def _add_struct(self, struct): 

602 # if the struct is not yet set, set it 

603 try: 

604 object.__getattribute__(self, "_struct") 

605 self._struct = construct_merge(self._struct, struct) 

606 except: 

607 self._struct = struct 

608 

609 def _get_path(self, path, default): 

610 parts = path.split(".") 

611 context = self.data 

612 

613 for i in range(len(parts)): 

614 p = parts[i] 

615 d = {} if i < len(parts) - 1 else default 

616 context = context.get(p, d) 

617 return context 

618 

619 def _set_path(self, path, val): 

620 parts = path.split(".") 

621 context = self.data 

622 

623 for i in range(len(parts)): 

624 p = parts[i] 

625 

626 if p not in context and i < len(parts) - 1: 

627 context[p] = {} 

628 context = context[p] 

629 elif p in context and i < len(parts) - 1: 

630 context = context[p] 

631 else: 

632 context[p] = val 

633 

634 def _delete_from_list(self, path, val=None, matchsub=None, prune=True, apply_struct_on_matchsub=True): 

635 """ 

636 Note that matchsub will be coerced with the struct if it exists, to ensure 

637 that the match is done correctly 

638 

639 :param path: 

640 :param val: 

641 :param matchsub: 

642 :param prune: 

643 :return: 

644 """ 

645 l = self._get_list(path) 

646 

647 removes = [] 

648 i = 0 

649 for entry in l: 

650 if val is not None: 

651 if entry == val: 

652 removes.append(i) 

653 elif matchsub is not None: 

654 # attempt to coerce the sub 

655 if apply_struct_on_matchsub: 

656 try: 

657 object.__getattribute__(self, "_struct") 

658 type, struct, instructions = construct_lookup(path, self._struct) 

659 if struct is not None: 

660 matchsub = construct(matchsub, struct, self._coerce_map) 

661 except: 

662 pass 

663 

664 matches = 0 

665 for k, v in matchsub.items(): 

666 if entry.get(k) == v: 

667 matches += 1 

668 if matches == len(list(matchsub.keys())): 

669 removes.append(i) 

670 i += 1 

671 

672 removes.sort(reverse=True) 

673 for r in removes: 

674 del l[r] 

675 

676 if len(l) == 0 and prune: 

677 self._delete(path, prune) 

678 

679 def _delete(self, path, prune=True): 

680 parts = path.split(".") 

681 context = self.data 

682 

683 stack = [] 

684 for i in range(len(parts)): 

685 p = parts[i] 

686 if p in context: 

687 if i < len(parts) - 1: 

688 stack.append(context[p]) 

689 context = context[p] 

690 else: 

691 del context[p] 

692 if prune and len(stack) > 0: 

693 stack.pop() # the last element was just deleted 

694 self._prune_stack(stack) 

695 

696 def _prune_stack(self, stack): 

697 while len(stack) > 0: 

698 context = stack.pop() 

699 todelete = [] 

700 for k, v in context.items(): 

701 if isinstance(v, dict) and len(list(v.keys())) == 0: 

702 todelete.append(k) 

703 for d in todelete: 

704 del context[d] 

705 

706 def _coerce(self, val, cast, accept_failure=False): 

707 if cast is None: 

708 return val 

709 try: 

710 return cast(val) 

711 except (ValueError, TypeError): 

712 if accept_failure: 

713 return val 

714 raise DataSchemaException("Cast with {x} failed on '{y}' of type {z}".format(x=cast, y=val, z=type(val))) 

715 

716 def _get_single(self, path, coerce=None, default=None, allow_coerce_failure=True): 

717 # get the value at the point in the object 

718 val = self._get_path(path, default) 

719 

720 if coerce is not None and val is not None: 

721 # if you want to coerce and there is something to coerce do it 

722 return self._coerce(val, coerce, accept_failure=allow_coerce_failure) 

723 else: 

724 # otherwise return the value 

725 return val 

726 

727 def _get_list(self, path, coerce=None, by_reference=True, allow_coerce_failure=True): 

728 # get the value at the point in the object 

729 val = self._get_path(path, None) 

730 

731 # if there is no value and we want to do by reference, then create it, bind it and return it 

732 if val is None and by_reference: 

733 mylist = [] 

734 self._set_single(path, mylist) 

735 return mylist 

736 

737 # otherwise, default is an empty list 

738 elif val is None and not by_reference: 

739 return [] 

740 

741 # check that the val is actually a list 

742 if not isinstance(val, list): 

743 raise DataSchemaException("Expecting a list at {x} but found {y}".format(x=path, y=val)) 

744 

745 # if there is a value, do we want to coerce each of them 

746 if coerce is not None: 

747 coerced = [self._coerce(v, coerce, accept_failure=allow_coerce_failure) for v in val] 

748 if by_reference: 

749 self._set_single(path, coerced) 

750 return coerced 

751 else: 

752 if by_reference: 

753 return val 

754 else: 

755 return deepcopy(val) 

756 

757 def _set_single(self, path, val, coerce=None, allow_coerce_failure=False, allowed_values=None, allowed_range=None, 

758 allow_none=True, ignore_none=False): 

759 

760 if val is None and ignore_none: 

761 return 

762 

763 if val is None and not allow_none: 

764 raise DataSchemaException("NoneType is not allowed at {x}".format(x=path)) 

765 

766 # first see if we need to coerce the value (and don't coerce None) 

767 if coerce is not None and val is not None: 

768 val = self._coerce(val, coerce, accept_failure=allow_coerce_failure) 

769 

770 if allowed_values is not None and val not in allowed_values: 

771 raise DataSchemaException("Value {x} is not permitted at {y}".format(x=val, y=path)) 

772 

773 if allowed_range is not None: 

774 lower, upper = allowed_range 

775 if (lower is not None and val < lower) or (upper is not None and val > upper): 

776 raise DataSchemaException("Value {x} is outside the allowed range: {l} - {u}".format(x=val, l=lower, u=upper)) 

777 

778 # now set it at the path point in the object 

779 self._set_path(path, val) 

780 

781 def _set_list(self, path, val, coerce=None, allow_coerce_failure=False, allow_none=True, ignore_none=False): 

782 # first ensure that the value is a list 

783 if not isinstance(val, list): 

784 val = [val] 

785 

786 # now carry out the None check 

787 # for each supplied value, if it is none, and none is not allowed, raise an error if we do not 

788 # plan to ignore the nones. 

789 for v in val: 

790 if v is None and not allow_none: 

791 if not ignore_none: 

792 raise DataSchemaException("NoneType is not allowed at {x}".format(x=path)) 

793 

794 # now coerce each of the values, stripping out Nones if necessary 

795 val = [self._coerce(v, coerce, accept_failure=allow_coerce_failure) for v in val if v is not None or not ignore_none] 

796 

797 # check that the cleaned array isn't empty, and if it is behave appropriately 

798 if len(val) == 0: 

799 # this is equivalent to a None, so we need to decide what to do 

800 if ignore_none: 

801 # if we are ignoring nones, just do nothing 

802 return 

803 elif not allow_none: 

804 # if we are not ignoring nones, and not allowing them, raise an error 

805 raise DataSchemaException("Empty array not permitted at {x}".format(x=path)) 

806 

807 # now set it on the path 

808 self._set_path(path, val) 

809 

810 def _add_to_list(self, path, val, coerce=None, allow_coerce_failure=False, allow_none=False, allowed_values=None, ignore_none=True, unique=False): 

811 if val is None and ignore_none: 

812 return 

813 

814 if val is None and not allow_none: 

815 raise DataSchemaException("NoneType is not allowed in list at {x}".format(x=path)) 

816 

817 if allowed_values is not None and val not in allowed_values: 

818 raise DataSchemaException("Value {x} is not permitted at {y}".format(x=val, y=path)) 

819 

820 # first coerce the value 

821 if coerce is not None: 

822 val = self._coerce(val, coerce, accept_failure=allow_coerce_failure) 

823 current = self._get_list(path, by_reference=True) 

824 

825 # if we require the list to be unique, check for the value first 

826 if unique: 

827 if val in current: 

828 return 

829 

830 # otherwise, append 

831 current.append(val) 

832 

833 def _set_with_struct(self, path, val): 

834 type, struct, instructions = construct_lookup(path, self._struct) 

835 if type == "field": 

836 kwargs = construct_kwargs(type, "set", instructions) 

837 coerce_fn = self._coerce_map.get(instructions.get("coerce", "unicode")) 

838 self._set_single(path, val, coerce=coerce_fn, **kwargs) 

839 elif type == "list": 

840 if not isinstance(val, list): 

841 val = [val] 

842 if struct is not None: 

843 val = [construct(x, struct, self._coerce_map) for x in val] 

844 kwargs = construct_kwargs(type, "set", instructions) 

845 coerce_fn = self._coerce_map.get(instructions.get("coerce")) 

846 self._set_list(path, val, coerce=coerce_fn, **kwargs) 

847 elif type == "object": 

848 if struct is not None: 

849 val = construct(val, struct, self._coerce_map) 

850 self._set_single(path, val) 

851 

852 def _add_to_list_with_struct(self, path, val): 

853 type, struct, instructions = construct_lookup(path, self._struct) 

854 if type != "list": 

855 raise DataStructureException("Attempt to add to list {x} failed - it is not a list element".format(x=path)) 

856 if struct is not None: 

857 val = construct(val, struct, self._coerce_map) 

858 kwargs = construct_kwargs(type, "set", instructions) 

859 self._add_to_list(path, val, **kwargs) 

860 

861 

862 def _utf8_unicode(self): 

863 """ 

864 DEPRECATED - use dataobj.to_unicode() instead 

865 """ 

866 return to_unicode() 

867 

868 def _int(self): 

869 """ 

870 DEPRECATED - use dataobj.to_int() instead 

871 """ 

872 return to_int() 

873 

874 def _float(self): 

875 """ 

876 DEPRECATED - use dataobj.to_float() instead 

877 """ 

878 return to_float() 

879 

880 def _date_str(self, in_format=None, out_format=None): 

881 """ 

882 DEPRECATED - use dataobj.date_str instead 

883 """ 

884 return date_str(in_format=in_format, out_format=out_format) 

885 

886 

887############################################################ 

888## Primitive object schema validation 

889 

890class ObjectSchemaValidationError(DataObjException): 

891 pass 

892 

893 

894def validate(obj, schema): 

895 """ 

896 DEPRECATED - use 'construct' instead. 

897 

898 :param obj: 

899 :param schema: 

900 :return: 

901 """ 

902 warnings.warn("DEPRECATED - use 'construct' instead.", DeprecationWarning) 

903 

904 # all fields 

905 allowed = schema.get("bools", []) + schema.get("fields", []) + schema.get("lists", []) + schema.get("objects", []) 

906 

907 for k, v in obj.items(): 

908 # is k allowed at all 

909 if k not in allowed: 

910 raise ObjectSchemaValidationError("object contains key " + k + " which is not permitted by schema") 

911 

912 # check the bools are bools 

913 if k in schema.get("bools", []): 

914 if type(v) != bool: 

915 raise ObjectSchemaValidationError("object contains " + k + " = " + str(v) + " but expected boolean") 

916 

917 # check that the fields are plain old strings 

918 if k in schema.get("fields", []): 

919 if type(v) != str and type(v) != int and type(v) != float: 

920 raise ObjectSchemaValidationError("object contains " + k + " = " + str(v) + " but expected string, unicode or a number") 

921 

922 # check that the lists are really lists 

923 if k in schema.get("lists", []): 

924 if type(v) != list: 

925 raise ObjectSchemaValidationError("object contains " + k + " = " + str(v) + " but expected list") 

926 # if it is a list, then for each member validate 

927 entry_schema = schema.get("list_entries", {}).get(k) 

928 if entry_schema is None: 

929 # validate the entries as fields 

930 for e in v: 

931 if type(e) != str and type(e) != int and type(e) != float: 

932 raise ObjectSchemaValidationError("list in object contains " + str(type(e)) + " but expected string, unicode or a number in " + k) 

933 else: 

934 # validate each entry against the schema 

935 for e in v: 

936 validate(e, entry_schema) 

937 

938 # check that the objects are objects 

939 if k in schema.get("objects", []): 

940 if type(v) != dict: 

941 raise ObjectSchemaValidationError("object contains " + k + " = " + str(v) + " but expected object/dict") 

942 # if it is an object, then validate 

943 object_schema = schema.get("object_entries", {}).get(k) 

944 if object_schema is None: 

945 #raise ObjectSchemaValidationError("no object entry for object " + k) 

946 pass # we are not imposing a schema on this object 

947 else: 

948 validate(v, object_schema) 

949 

950 

951############################################################ 

952## Data structure coercion 

953 

954class DataStructureException(DataObjException): 

955 pass 

956 

957class ConstructException(DataObjException): 

958 pass 

959 

960class ScriptTagFoundException(DataObjException): 

961 """ 

962 Exception to raise when script tag has been found in metadata 

963 """ 

964 pass 

965 

966 

967def construct_validate(struct, context=""): 

968 """ 

969 Is the provided struct of the correct form 

970 { 

971 "fields" : { 

972 "field_name" : {"coerce" :"coerce_function", **kwargs} 

973 }, 

974 "objects" : [ 

975 "field_name" 

976 ], 

977 "lists" : { 

978 "field_name" : {"contains" : "object|field", "coerce" : "field_coerce_function, **kwargs} 

979 }, 

980 "required" : ["field_name"], 

981 "structs" : { 

982 "field_name" : { 

983 <construct> 

984 } 

985 } 

986 } 

987 """ 

988 # check that only the allowed keys are present 

989 keys = struct.keys() 

990 for k in keys: 

991 if k not in ["fields", "objects", "lists", "required", "structs"]: 

992 c = context if context != "" else "root" 

993 raise ConstructException("Key '{x}' present in struct at '{y}', but is not permitted".format(x=k, y=c)) 

994 

995 # now go through and make sure the fields are the right shape: 

996 for field_name, instructions in struct.get("fields", {}).items(): 

997 if "coerce" not in instructions: 

998 c = context if context != "" else "root" 

999 raise ConstructException("Coerce function not listed in field '{x}' at '{y}'".format(x=field_name, y=c)) 

1000 for k,v in instructions.items(): 

1001 if not isinstance(v, list) and not isinstance(v, str): 

1002 c = context if context != "" else "root" 

1003 raise ConstructException("Argument '{a}' in field '{b}' at '{c}' is not a string or list".format(a=k, b=field_name, c=c)) 

1004 

1005 # then make sure the objects are ok 

1006 for o in struct.get("objects", []): 

1007 if not isinstance(o, str): 

1008 c = context if context != "" else "root" 

1009 raise ConstructException("There is a non-string value in the object list at '{y}'".format(y=c)) 

1010 

1011 # make sure the lists are correct 

1012 for field_name, instructions in struct.get("lists", {}).items(): 

1013 contains = instructions.get("contains") 

1014 if contains is None: 

1015 c = context if context != "" else "root" 

1016 raise ConstructException("No 'contains' argument in list definition for field '{x}' at '{y}'".format(x=field_name, y=c)) 

1017 if contains not in ["object", "field"]: 

1018 c = context if context != "" else "root" 

1019 raise ConstructException("'contains' argument in list '{x}' at '{y}' contains illegal value '{z}'".format(x=field_name, y=c, z=contains)) 

1020 for k,v in instructions.items(): 

1021 if not isinstance(v, list) and not isinstance(v, str): 

1022 c = context if context != "" else "root" 

1023 raise ConstructException("Argument '{a}' in list '{b}' at '{c}' is not a string or list".format(a=k, b=field_name, c=c)) 

1024 

1025 # make sure the requireds are correct 

1026 for o in struct.get("required", []): 

1027 if not isinstance(o, str): 

1028 c = context if context != "" else "root" 

1029 raise ConstructException("There is a non-string value in the required list at '{y}'".format(y=c)) 

1030 

1031 # now do the structs, which will involve some recursion 

1032 substructs = struct.get("structs", {}) 

1033 

1034 # first check that there are no previously unknown keys in there 

1035 possibles = struct.get("objects", []) + list(struct.get("lists", {}).keys()) 

1036 for s in substructs: 

1037 if s not in possibles: 

1038 c = context if context != "" else "root" 

1039 raise ConstructException("struct contains key '{a}' which is not listed in object or list definitions at '{x}'".format(a=s, x=c)) 

1040 

1041 # now recurse into each struct 

1042 for k,v in substructs.items(): 

1043 nc = context 

1044 if nc == "": 

1045 nc = k 

1046 else: 

1047 nc += "." + k 

1048 construct_validate(v, context=nc) 

1049 

1050 return True 

1051 

1052 

1053def construct(obj, struct, coerce, context="", silent_prune=False, maintain_reference=False): 

1054 """ 

1055 { 

1056 "fields" : { 

1057 "field_name" : {"coerce" :"coerce_function", **kwargs} 

1058 

1059 }, 

1060 "objects" : [ 

1061 "field_name" 

1062 ], 

1063 "lists" : { 

1064 "field_name" : {"contains" : "object|field", "coerce" : "field_coerce_function, **kwargs} 

1065 }, 

1066 "required" : ["field_name"], 

1067 "structs" : { 

1068 "field_name" : { 

1069 <construct> 

1070 } 

1071 } 

1072 } 

1073 

1074 :param obj: 

1075 :param struct: 

1076 :param coerce: 

1077 :return: 

1078 """ 

1079 if obj is None: 

1080 return None 

1081 

1082 # check that all the required fields are there 

1083 try: 

1084 keys = list(obj.keys()) 

1085 except: 

1086 c = context if context != "" else "root" 

1087 raise DataStructureException("Expected an object at {c} but found something else instead".format(c=c)) 

1088 

1089 for r in struct.get("required", []): 

1090 if r not in keys: 

1091 c = context if context != "" else "root" 

1092 raise DataStructureException("Field '{r}' is required but not present at '{c}'".format(r=r, c=c)) 

1093 

1094 # check that there are no fields that are not allowed 

1095 # Note that since the construct mechanism copies fields explicitly, silent_prune literally just turns off this 

1096 # check 

1097 if not silent_prune: 

1098 allowed = list(struct.get("fields", {}).keys()) + struct.get("objects", []) + list(struct.get("lists", {}).keys()) 

1099 for k in keys: 

1100 if k not in allowed: 

1101 c = context if context != "" else "root" 

1102 raise DataStructureException("Field '{k}' is not permitted at '{c}'".format(k=k, c=c)) 

1103 

1104 

1105 # this is the new object we'll be creating from the old 

1106 constructed = DataObj() 

1107 

1108 # now check all the fields 

1109 for field_name, instructions in struct.get("fields", {}).items(): 

1110 val = obj.get(field_name) 

1111 if val is None: 

1112 continue 

1113 coerce_fn = coerce.get(instructions.get("coerce", "unicode")) 

1114 if coerce_fn is None: 

1115 raise DataStructureException("No coersion function defined for type '{x}' at '{c}'".format(x=instructions.get("coerce", "unicode"), c=context + field_name)) 

1116 

1117 kwargs = construct_kwargs("field", "set", instructions) 

1118 

1119 try: 

1120 constructed._set_single(field_name, val, coerce=coerce_fn, **kwargs) 

1121 except DataSchemaException as e: 

1122 raise DataStructureException("Schema exception at '{a}', {b}".format(a=context + field_name, b=str(e))) 

1123 

1124 # next check all the objetcs (which will involve a recursive call to this function) 

1125 for field_name in struct.get("objects", []): 

1126 val = obj.get(field_name) 

1127 if val is None: 

1128 continue 

1129 if type(val) != dict: 

1130 raise DataStructureException("Found '{x}' = '{y}' but expected object/dict".format(x=context + field_name, y=val)) 

1131 

1132 instructions = struct.get("structs", {}).get(field_name) 

1133 

1134 if instructions is None: 

1135 # this is the lowest point at which we have instructions, so just accept the data structure as-is 

1136 # (taking a deep copy to destroy any references) 

1137 try: 

1138 constructed._set_single(field_name, deepcopy(val)) 

1139 except DataSchemaException as e: 

1140 raise DataStructureException(str(e)) 

1141 else: 

1142 # we need to recurse further down 

1143 beneath = construct(val, instructions, coerce=coerce, context=context + field_name + ".", silent_prune=silent_prune) 

1144 

1145 # what we get back is the correct sub-data structure, which we can then store 

1146 try: 

1147 constructed._set_single(field_name, beneath) 

1148 except DataSchemaException as e: 

1149 raise DataStructureException(str(e)) 

1150 

1151 # now check all the lists 

1152 for field_name, instructions in struct.get("lists", {}).items(): 

1153 vals = obj.get(field_name) 

1154 if vals is None: 

1155 continue 

1156 if not isinstance(vals, list): 

1157 raise DataStructureException("Expecting list at {x} but found something else".format(x=context + field_name)) 

1158 

1159 # prep the keyword arguments for the setters 

1160 kwargs = construct_kwargs("list", "set", instructions) 

1161 

1162 contains = instructions.get("contains") 

1163 if contains == "field": 

1164 # coerce all the values in the list 

1165 coerce_fn = coerce.get(instructions.get("coerce", "unicode")) 

1166 if coerce_fn is None: 

1167 raise DataStructureException("No coersion function defined for type '{x}' at '{c}'".format(x=instructions.get("coerce", "unicode"), c=context + field_name)) 

1168 

1169 for i in range(len(vals)): 

1170 val = vals[i] 

1171 try: 

1172 constructed._add_to_list(field_name, val, coerce=coerce_fn, **kwargs) 

1173 except DataSchemaException as e: 

1174 raise DataStructureException(str(e)) 

1175 

1176 elif contains == "object": 

1177 # for each object in the list, send it for construction 

1178 for i in range(len(vals)): 

1179 val = vals[i] 

1180 

1181 if type(val) != dict: 

1182 raise DataStructureException("Found '{x}[{p}]' = '{y}' but expected object/dict".format(x=context + field_name, y=val, p=i)) 

1183 

1184 subinst = struct.get("structs", {}).get(field_name) 

1185 if subinst is None: 

1186 try: 

1187 constructed._add_to_list(field_name, deepcopy(val)) 

1188 except DataSchemaException as e: 

1189 raise DataStructureException(str(e)) 

1190 else: 

1191 # we need to recurse further down 

1192 beneath = construct(val, subinst, coerce=coerce, context=context + field_name + "[" + str(i) + "].", silent_prune=silent_prune) 

1193 

1194 # what we get back is the correct sub-data structure, which we can then store 

1195 try: 

1196 constructed._add_to_list(field_name, beneath) 

1197 except DataSchemaException as e: 

1198 raise DataStructureException(str(e)) 

1199 

1200 else: 

1201 raise DataStructureException("Cannot understand structure where list '{x}' elements contain '{y}'".format(x=context + field_name, y=contains)) 

1202 

1203 if maintain_reference: 

1204 obj.clear() 

1205 obj.update(constructed.data) 

1206 return obj 

1207 else: 

1208 return constructed.data 

1209 

1210 

1211def construct_merge(target, source): 

1212 merged = deepcopy(target) 

1213 

1214 for field, instructions in source.get("fields", {}).items(): 

1215 if "fields" not in merged: 

1216 merged["fields"] = {} 

1217 if field not in merged["fields"]: 

1218 merged["fields"][field] = deepcopy(instructions) 

1219 

1220 for obj in source.get("objects", []): 

1221 if "objects" not in merged: 

1222 merged["objects"] = [] 

1223 if obj not in merged["objects"]: 

1224 merged["objects"].append(obj) 

1225 

1226 for field, instructions in source.get("lists", {}).items(): 

1227 if "lists" not in merged: 

1228 merged["lists"] = {} 

1229 if field not in merged["lists"]: 

1230 merged["lists"][field] = deepcopy(instructions) 

1231 

1232 for r in source.get("required", []): 

1233 if "required" not in merged: 

1234 merged["required"] = [] 

1235 if r not in merged["required"]: 

1236 merged["required"].append(r) 

1237 

1238 for field, struct in source.get("structs", {}).items(): 

1239 if "structs" not in merged: 

1240 merged["structs"] = {} 

1241 if field not in merged["structs"]: 

1242 merged["structs"][field] = deepcopy(struct) 

1243 else: 

1244 # recursively merge 

1245 merged["structs"][field] = construct_merge(merged["structs"][field], struct) 

1246 

1247 return merged 

1248 

1249def construct_lookup(path, struct): 

1250 bits = path.split(".") 

1251 

1252 # if there's more than one path element, we will need to recurse 

1253 if len(bits) > 1: 

1254 # it has to be an object, in order for the path to still have multiple 

1255 # segments 

1256 if bits[0] not in struct.get("objects", []): 

1257 return None, None, None 

1258 substruct = struct.get("structs", {}).get(bits[0]) 

1259 return construct_lookup(".".join(bits[1:]), substruct) 

1260 elif len(bits) == 1: 

1261 # first check the fields 

1262 instructions = struct.get("fields", {}).get(bits[0]) 

1263 if instructions is not None: 

1264 return "field", None, instructions 

1265 

1266 # then check the lists 

1267 instructions = struct.get("lists", {}).get(bits[0]) 

1268 if instructions is not None: 

1269 structure = struct.get("structs", {}).get(bits[0]) 

1270 return "list", structure, instructions 

1271 

1272 # then check the objects 

1273 if bits[0] in struct.get("objects", []): 

1274 structure = struct.get("structs", {}).get(bits[0]) 

1275 return "object", structure, None 

1276 

1277 return None, None, None 

1278 

1279def construct_kwargs(type, dir, instructions): 

1280 # if there are no instructions there are no kwargs 

1281 if instructions is None: 

1282 return {} 

1283 

1284 # take a copy of the instructions that we can modify 

1285 kwargs = deepcopy(instructions) 

1286 

1287 # remove the known arguments for the field type 

1288 if type == "field": 

1289 if "coerce" in kwargs: 

1290 del kwargs["coerce"] 

1291 

1292 elif type == "list": 

1293 if "coerce" in kwargs: 

1294 del kwargs["coerce"] 

1295 if "contains" in kwargs: 

1296 del kwargs["contains"] 

1297 

1298 nk = {} 

1299 if dir == "set": 

1300 for k, v in kwargs.items(): 

1301 # basically everything is a "set" argument unless explicitly stated to be a "get" argument 

1302 if not k.startswith("get__"): 

1303 if k.startswith("set__"): # if it starts with the set__ prefix, remove it 

1304 k = k[5:] 

1305 nk[k] = v 

1306 elif dir == "get": 

1307 for k, v in kwargs.items(): 

1308 # must start with "get" argument 

1309 if k.startswith("get__"): 

1310 nk[k[5:]] = v 

1311 

1312 return nk 

1313 

1314def construct_data_keys(struct): 

1315 return list(struct.get("fields", {})) + list(struct.get("objects", [])) + list(struct.get("lists", {})) 

1316 

1317def merge_outside_construct(struct, target, source): 

1318 merged = deepcopy(target) 

1319 

1320 for source_key in source.keys(): 

1321 # if the source_key is one of the struct's fields, ignore it 

1322 if source_key in list(struct.get("fields", {}).keys()): 

1323 continue 

1324 

1325 # if the source_key is one of the struct's lists, ignore it 

1326 if source_key in list(struct.get("lists", {}).keys()): 

1327 continue 

1328 

1329 # if the source_key is one of the struct's object, we will need to go deeper 

1330 if source_key in struct.get("objects", []): 

1331 subsource = source[source_key] 

1332 subtarget = target.get(source_key, {}) 

1333 substruct = struct.get("structs", {}).get(source_key, {}) 

1334 merged[source_key] = merge_outside_construct(substruct, subtarget, subsource) 

1335 continue 

1336 

1337 # if we get to here, the field in the source is not represented at this level in the struct, 

1338 # so we should copy it over in full (unless the target already has a value here) 

1339 if source_key not in merged: 

1340 merged[source_key] = deepcopy(source[source_key]) 

1341 

1342 return merged 

1343 

1344############################################################ 

1345## Unit test support 

1346 

1347def test_dataobj(obj, fields_and_values): 

1348 """ 

1349 Test a dataobj to make sure that the getters and setters you have specified 

1350 are working correctly. 

1351 

1352 Provide it a data object and a list of fields with the values to set and the expeceted return values (if required): 

1353 

1354 { 

1355 "key" : ("set value", "get value") 

1356 } 

1357 

1358 If you provide only the set value, then the get value will be required to be the same as the set value in the test 

1359 

1360 { 

1361 "key" : "set value" 

1362 } 

1363 

1364 :param obj: 

1365 :param fields_and_values: 

1366 :return: 

1367 """ 

1368 for k, valtup in fields_and_values.items(): 

1369 if not isinstance(valtup, tuple): 

1370 valtup = (valtup,) 

1371 set_val = valtup[0] 

1372 try: 

1373 setattr(obj, k, set_val) 

1374 except AttributeError: 

1375 assert False, "Unable to set attribute {x} with value {y}".format(x=k, y=set_val) 

1376 

1377 for k, valtup in fields_and_values.items(): 

1378 if not isinstance(valtup, tuple): 

1379 valtup = (valtup,) 

1380 get_val = valtup[0] 

1381 if len(valtup) > 1: 

1382 get_val = valtup[1] 

1383 val = getattr(obj, k) 

1384 assert val == get_val, (k, val, get_val)