Coverage for portality / models / v2 / journal.py: 92%

799 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-05-04 09:41 +0100

1from __future__ import annotations 

2 

3import string 

4import uuid 

5from copy import deepcopy 

6from datetime import datetime, timedelta 

7from typing import Callable, Iterable 

8 

9from unidecode import unidecode 

10 

11from portality.core import app 

12from portality.dao import DomainObject 

13from portality.lib import es_data_mapping, dates, coerce 

14from portality.lib.coerce import COERCE_MAP 

15from portality.lib.dates import DEFAULT_TIMESTAMP_VAL, find_earliest_date 

16from portality.lib.seamless import SeamlessMixin 

17from portality.models.account import Account 

18from portality.models.v2 import shared_structs 

19from portality.models.v2.bibjson import JournalLikeBibJSON 

20 

21from portality.lib.dates import FMT_DATE_STD 

22 

23JOURNAL_STRUCT = { 

24 "objects": [ 

25 "admin", "index" 

26 ], 

27 

28 "structs": { 

29 "admin": { 

30 "fields": { 

31 "in_doaj": {"coerce": "bool"}, 

32 "ticked": {"coerce": "bool"}, 

33 "current_application": {"coerce": "unicode"}, 

34 "last_full_review": {"coerce": "bigenddate"}, 

35 "last_withdrawn": {"coerce": "utcdatetime"}, 

36 "last_reinstated": {"coerce": "utcdatetime"}, 

37 "last_owner_transfer": {"coerce": "utcdatetime"} 

38 }, 

39 "lists": { 

40 "related_applications": {"contains": "object"} 

41 }, 

42 "structs": { 

43 "related_applications": { 

44 "fields": { 

45 "application_id": {"coerce": "unicode"}, 

46 "date_accepted": {"coerce": "utcdatetime"}, 

47 "status": {"coerce": "unicode"} 

48 } 

49 }, 

50 "contact": { 

51 "name": {"coerce": "unicode"}, 

52 "email": {"coerce": "unicode"} 

53 } 

54 } 

55 }, 

56 "index": { 

57 "fields": { 

58 "publisher_ac": {"coerce": "unicode"}, 

59 "institution_ac": {"coerce": "unicode"} 

60 } 

61 } 

62 } 

63} 

64 

65 

66 

67class ContinuationException(Exception): 

68 pass 

69 

70 

71class JournalLikeObject(SeamlessMixin, DomainObject): 

72 

73 # During migration from the old data model to the new data model for journal-like objects, this allows 

74 # the front-end to continue to work, even if the object sees data which is not in the struct. 

75 # This can be commented out after any migration which changes the data model 

76 __SEAMLESS_SILENT_PRUNE__ = app.config.get("SEAMLESS_JOURNAL_LIKE_SILENT_PRUNE", False) 

77 

78 @classmethod 

79 def find_by_issn(cls, issns, in_doaj=None, max=10): 

80 if not isinstance(issns, list): 

81 issns = [issns] 

82 q = JournalQuery() 

83 q.find_by_issn(issns, in_doaj=in_doaj, max=max) 

84 result = cls.query(q=q.query) 

85 # create an array of objects, using cls rather than Journal, which means subclasses can use it too 

86 records = [cls(**r.get("_source")) for r in result.get("hits", {}).get("hits", [])] 

87 return records 

88 

89 @classmethod 

90 def find_by_issn_exact(cls, issns, in_doaj=None, max=2): 

91 """ 

92 Finds journal that matches given issns exactly - if no data problems should always be only 1 

93 """ 

94 if not isinstance(issns, list): 

95 issns = [issns] 

96 if len(issns) > 2: 

97 return [] 

98 q = JournalQuery() 

99 q.find_by_issn_exact(issns, in_doaj=in_doaj, max=max) 

100 result = cls.query(q=q.query) 

101 # create an array of objects, using cls rather than Journal, which means subclasses can use it too 

102 records = [cls(**r.get("_source")) for r in result.get("hits", {}).get("hits", [])] 

103 return records 

104 

105 @classmethod 

106 def issns_by_owner(cls, owner, in_doaj=None, issn_field=None): 

107 q = IssnQuery(owner, in_doaj=in_doaj, issn_field=issn_field) 

108 res = cls.query(q=q.query()) 

109 issns = [term.get("key") for term in res.get("aggregations", {}).get("issns", {}).get("buckets", [])] 

110 return issns 

111 

112 @classmethod 

113 def get_by_owner(cls, owner): 

114 q = OwnerQuery(owner) 

115 res = cls.query(q=q.query()) 

116 # get_by_owner() in application.py predates this, but I've made it an override because it does application stuff 

117 records = [cls(**r.get("_source")) for r in res.get("hits", {}).get("hits", [])] 

118 return records 

119 

120 @classmethod 

121 def issns_by_query(cls, query): 

122 issns = [] 

123 for j in cls.iterate(query): 

124 issns += j.known_issns() 

125 return issns 

126 

127 @classmethod 

128 def find_by_journal_url(cls, url, in_doaj=None, max=10): 

129 q = JournalURLQuery(url, in_doaj, max) 

130 result = cls.query(q=q.query()) 

131 # create an array of objects, using cls rather than Journal, which means subclasses can use it too 

132 records = [cls(**r.get("_source")) for r in result.get("hits", {}).get("hits", [])] 

133 return records 

134 

135 @classmethod 

136 def recent(cls, max=10): 

137 q = RecentJournalsQuery(max) 

138 result = cls.query(q=q.query()) 

139 # create an array of objects, using cls rather than Journal, which means subclasses can use it too 

140 records = [cls(**r.get("_source")) for r in result.get("hits", {}).get("hits", [])] 

141 return records 

142 

143 ############################################ 

144 ## base property methods 

145 

146 @property 

147 def data(self): 

148 return self.__seamless__.data 

149 

150 @property 

151 def has_apc(self): 

152 return self.__seamless__.get_single("bibjson.apc.has_apc") 

153 

154 @property 

155 def id(self): 

156 return self.__seamless__.get_single("id") 

157 

158 def set_id(self, id=None): 

159 if id is None: 

160 id = self.makeid() 

161 self.__seamless__.set_with_struct("id", id) 

162 

163 def set_created(self, date=None): 

164 if date is None: 

165 date = dates.now_str() 

166 self.__seamless__.set_with_struct("created_date", date) 

167 

168 @property 

169 def created_date(self): 

170 return self.__seamless__.get_single("created_date") 

171 

172 @property 

173 def created_timestamp(self): 

174 return self.__seamless__.get_single("created_date", coerce=coerce.to_datestamp()) 

175 

176 def set_last_updated(self, date=None): 

177 if date is None: 

178 date = dates.now_str() 

179 self.__seamless__.set_with_struct("last_updated", date) 

180 

181 @property 

182 def last_updated(self): 

183 return self.__seamless__.get_single("last_updated") 

184 

185 @property 

186 def last_updated_timestamp(self): 

187 return self.__seamless__.get_single("last_updated", coerce=coerce.to_datestamp()) 

188 

189 def last_updated_since(self, days=0): 

190 return self.last_updated_timestamp > (dates.now() - timedelta(days=days)) 

191 

192 def set_last_manual_update(self, date=None): 

193 if date is None: 

194 date = dates.now_str() 

195 self.__seamless__.set_with_struct("last_manual_update", date) 

196 

197 @property 

198 def last_manual_update(self): 

199 return self.__seamless__.get_single("last_manual_update") 

200 

201 def last_manually_updated_since(self, days=0): 

202 return self.last_manual_update_timestamp > (datetime.utcnow() - timedelta(days=days)) 

203 

204 @property 

205 def last_manual_update_timestamp(self): 

206 return self.__seamless__.get_single("last_manual_update", coerce=coerce.to_datestamp()) 

207 

208 @property 

209 def most_urgent_flag_deadline_timestamp(self): 

210 fn = coerce.to_datestamp() 

211 return fn(self.most_urgent_flag_deadline) 

212 # return self.__seamless__.get_single("most_urgent_flag_deadline", coerce=coerce.to_datestamp()) 

213 

214 def has_been_manually_updated(self): 

215 lmut = self.last_manual_update_timestamp 

216 if lmut is None: 

217 return False 

218 return lmut > datetime.utcfromtimestamp(0) 

219 

220 def set_date_applied(self, date=None): 

221 if date is None: 

222 date = dates.now_str() 

223 self.__seamless__.set_with_struct("admin.date_applied", date) 

224 

225 @property 

226 def date_applied(self): 

227 return self.__seamless__.get_single("admin.date_applied") 

228 

229 @property 

230 def date_applied_timestamp(self): 

231 return self.__seamless__.get_single("admin.date_applied", coerce=coerce.to_datestamp()) 

232 

233 @date_applied.setter 

234 def date_applied(self, val): 

235 self.__seamless__.set_with_struct("admin.date_applied", val) 

236 

237 def has_oa_start_date(self): 

238 return self.__seamless__.get_single("bibjson.oa_start", default=False) 

239 

240 @property 

241 def owner(self): 

242 return self.__seamless__.get_single("admin.owner") 

243 

244 def set_owner(self, owner): 

245 self.__seamless__.set_with_struct("admin.owner", owner) 

246 

247 def remove_owner(self): 

248 self.__seamless__.delete("admin.owner") 

249 

250 @property 

251 def owner_account(self): 

252 if self.owner: 

253 return Account.pull(self.owner) 

254 return None 

255 

256 @property 

257 def editor_group(self): 

258 return self.__seamless__.get_single("admin.editor_group") 

259 

260 def set_editor_group(self, eg): 

261 self.__seamless__.set_with_struct("admin.editor_group", eg) 

262 

263 def remove_editor_group(self): 

264 self.__seamless__.delete("admin.editor_group") 

265 

266 @property 

267 def editor(self): 

268 return self.__seamless__.get_single("admin.editor") 

269 

270 def set_editor(self, ed): 

271 self.__seamless__.set_with_struct("admin.editor", ed) 

272 

273 def remove_editor(self): 

274 self.__seamless__.delete('admin.editor') 

275 

276 @property 

277 def contact(self): 

278 return self.__seamless__.get_single("admin.contact") 

279 

280 @property 

281 def contact_name(self): 

282 return self.__seamless__.get_single("admin.contact.name") 

283 

284 @contact_name.setter 

285 def contact_name(self, name): 

286 self.__seamless__.set_with_struct("admin.contact.name", name) 

287 

288 @property 

289 def contact_email(self): 

290 return self.__seamless__.get_single("admin.contact.email") 

291 

292 @contact_email.setter 

293 def contact_email(self, email): 

294 self.__seamless__.set_with_struct("admin.contact.email", email) 

295 

296 def set_contact(self, name, email): 

297 self.contact_name = name 

298 self.contact_email = email 

299 

300 def remove_contact(self): 

301 self.__seamless__.delete("admin.contact") 

302 

303 #### Notes methods 

304 

305 def add_note(self, note, date=None, id=None, author_id=None, assigned_to=None, deadline=None): 

306 if not date: 

307 date = dates.now_str() 

308 if id == "": 

309 id = None 

310 

311 obj = {"date": date, "note": note} 

312 if id is not None: 

313 obj["id"] = id 

314 if author_id is not None: 

315 obj["author_id"] = author_id 

316 if assigned_to is not None or deadline is not None: 

317 obj["flag"] = {} 

318 if assigned_to is not None: 

319 obj["flag"]["assigned_to"] = assigned_to 

320 if deadline is not None: 

321 obj["flag"]["deadline"] = deadline 

322 self.__seamless__.delete_from_list("admin.notes", matchsub=obj) 

323 if id is None: 

324 obj["id"] = uuid.uuid4() 

325 self.__seamless__.add_to_list_with_struct("admin.notes", obj) 

326 

327 def add_note_by_dict(self, note): 

328 return self.add_note(note=note.get("note"), date=note.get("date"), 

329 id=note.get("id"), author_id=note.get("author_id")) 

330 

331 def remove_note(self, note): 

332 self.__seamless__.delete_from_list("admin.notes", matchsub=note) 

333 

334 def remove_note_by_id(self, note_id): 

335 """ 

336 Remove a note by its ID. 

337 :param note_id: The ID of the note to remove. 

338 """ 

339 self.__seamless__.delete_from_list("admin.notes", matchsub={"id": note_id}) 

340 

341 def get_note_by_id(self, note_id): 

342 candidates = [n for n in self.notes if n.get("id") == note_id] 

343 if len(candidates) == 0: 

344 return None 

345 return candidates[0] 

346 

347 def set_notes(self, notes): 

348 self.__seamless__.set_with_struct("admin.notes", notes) 

349 

350 def remove_notes(self): 

351 self.__seamless__.delete("admin.notes") 

352 

353 @property 

354 def notes(self): 

355 return self.__seamless__.get_list("admin.notes") 

356 

357 @property 

358 def notes_except_flags(self): 

359 return [note for note in self.notes if not note.get("flag") or not note["flag"].get("assigned_to")] 

360 

361 @property 

362 def flags(self): 

363 return [note for note in self.notes if note.get("flag") and note["flag"].get("assigned_to")] 

364 

365 @property 

366 def is_flagged(self): 

367 return len(self.flags) > 0 

368 

369 def resolve_flag(self, flag_id, updated_note): 

370 flag = self.get_note_by_id(flag_id) 

371 self.remove_note_by_id(flag_id) 

372 self.add_note(updated_note, flag.get("date"), flag_id, flag.get("author_id")) 

373 

374 @property 

375 def most_urgent_flag_deadline(self): 

376 # We allow only 1 flag per record now, but this code allows more 

377 # Filter notes to only include those with a 'flag' and a 'deadline' 

378 deadlines = [ 

379 flag["flag"].get("deadline") for flag in self.flags 

380 if flag["flag"].get("deadline") 

381 ] 

382 

383 # Find the flag with the earliest deadline 

384 if not len(deadlines): 

385 return dates.far_in_the_future() # Dummy date for least urgent date 

386 

387 earliest_flag_deadline = find_earliest_date(deadlines, dates_format=FMT_DATE_STD) 

388 

389 return earliest_flag_deadline 

390 

391 @property 

392 def ordered_notes(self): 

393 """Orders notes by newest first""" 

394 notes = self.notes 

395 return self._order_notes(notes) 

396 

397 @property 

398 def ordered_notes_except_flags(self): 

399 notes = self.notes_except_flags 

400 return self._order_notes(notes) 

401 

402 def _order_notes(self, notes): 

403 clusters = {} 

404 for note in notes: 

405 if "date" not in note: 

406 note["date"] = DEFAULT_TIMESTAMP_VAL # this really means something is broken with note date setting, which needs to be fixed 

407 if note["date"] not in clusters: 

408 clusters[note["date"]] = [note] 

409 else: 

410 clusters[note["date"]].append(note) 

411 

412 ordered_keys = sorted(list(clusters.keys()), reverse=True) 

413 ordered = [] 

414 for key in ordered_keys: 

415 clusters[key].reverse() 

416 ordered += clusters[key] 

417 return ordered 

418 

419 #### end of notes methods 

420 

421 def bibjson(self): 

422 bj = self.__seamless__.get_single("bibjson") 

423 if bj is None: 

424 self.__seamless__.set_single("bibjson", {}) 

425 bj = self.__seamless__.get_single("bibjson") 

426 return JournalLikeBibJSON(bj) 

427 

428 def set_bibjson(self, bibjson): 

429 bibjson = bibjson.data if isinstance(bibjson, JournalLikeBibJSON) else bibjson 

430 self.__seamless__.set_with_struct("bibjson", bibjson) 

431 

432 ###################################################### 

433 ## DEPRECATED METHODS 

434 

435 def known_issns(self): 

436 """ 

437 DEPRECATED 

438 

439 all issns this journal is known by 

440 

441 This used to mean "all issns the journal has ever been known by", but that definition has changed since 

442 continuations have been separated from the single journal object model. 

443 

444 Now this is just a proxy for self.bibjson().issns() 

445 """ 

446 return self.bibjson().issns() 

447 

448 def get_latest_contact_name(self): 

449 return self.contact_name 

450 

451 def get_latest_contact_email(self): 

452 return self.contact_email 

453 

454 def add_contact(self, name, email): 

455 self.set_contact(name, email) 

456 

457 def remove_contacts(self): 

458 self.remove_contact() 

459 

460 ###################################################### 

461 ## internal utility methods 

462 

463 def _generate_index(self): 

464 # the index fields we are going to generate 

465 titles = [] 

466 subjects = [] 

467 schema_subjects = [] 

468 schema_codes = [] 

469 schema_codes_tree = [] 

470 classification = [] 

471 langs = [] 

472 country = None 

473 license = [] 

474 publisher = [] 

475 classification_paths = [] 

476 unpunctitle = None 

477 asciiunpunctitle = None 

478 continued = "No" 

479 has_editor_group = "No" 

480 has_editor = "No" 

481 is_flagged = False 

482 flag_assignees = [] 

483 most_urgent_flag_deadline = dates.far_in_the_future() 

484 

485 # the places we're going to get those fields from 

486 cbib = self.bibjson() 

487 

488 # get the title out of the current bibjson 

489 if cbib.title is not None: 

490 titles.append(cbib.title) 

491 if cbib.alternative_title: 

492 titles.append(cbib.alternative_title) 

493 

494 # get the subjects and concatenate them with their schemes from the current bibjson 

495 for subs in cbib.subject: 

496 scheme = subs.get("scheme") 

497 term = subs.get("term") 

498 if term: 

499 subjects.append(term) 

500 schema_subjects.append(scheme + ":" + term) 

501 classification.append(term) 

502 if "code" in subs: 

503 schema_codes.append(scheme + ":" + subs.get("code")) 

504 

505 # now expand the classification to hold all its parent terms too 

506 additional = [] 

507 for c in classification: 

508 tp = cbib.term_path(c) 

509 if tp is not None: 

510 additional += tp 

511 classification += additional 

512 

513 # add the keywords to the non-schema subjects (but not the classification) 

514 subjects += cbib.keywords 

515 

516 # get the bibjson object to convert the languages to the english form 

517 langs = cbib.language_name() 

518 

519 # get the english name of the country 

520 country = cbib.country_name() 

521 

522 # get the type of the licenses 

523 for l in cbib.licences: 

524 license.append(l.get("type")) 

525 

526 # check for any flags 

527 is_flagged = self.is_flagged 

528 

529 flag_assignees = [ 

530 note["flag"]["assigned_to"] 

531 for note in self.notes 

532 if "assigned_to" in note.get("flag", {}) and note["flag"]["assigned_to"] 

533 ] 

534 most_urgent_flag_deadline = self.most_urgent_flag_deadline 

535 

536 # deduplicate the lists 

537 titles = list(set(titles)) 

538 subjects = list(set(subjects)) 

539 schema_subjects = list(set(schema_subjects)) 

540 classification = list(set(classification)) 

541 license = list(set(license)) 

542 schema_codes = list(set(schema_codes)) 

543 

544 # get the full classification paths for the subjects 

545 classification_paths = cbib.lcc_paths() 

546 schema_codes_tree = cbib.lcc_codes_full_list() 

547 

548 # create an unpunctitle 

549 if cbib.title is not None: 

550 throwlist = string.punctuation + '\n\t' 

551 unpunctitle = "".join(c for c in cbib.title if c not in throwlist).strip() 

552 try: 

553 asciiunpunctitle = unidecode(unpunctitle) 

554 except: 

555 asciiunpunctitle = unpunctitle 

556 

557 # record if this journal object is a continuation 

558 if len(cbib.replaces) > 0 or len(cbib.is_replaced_by) > 0: 

559 continued = "Yes" 

560 

561 if self.editor_group is not None: 

562 has_editor_group = "Yes" 

563 

564 if self.editor is not None: 

565 has_editor = "Yes" 

566 

567 # build the index part of the object 

568 index = {} 

569 

570 if country is not None: 

571 index["country"] = country 

572 if unpunctitle is not None: 

573 index["unpunctitle"] = unpunctitle 

574 if asciiunpunctitle is not None: 

575 index["asciiunpunctitle"] = asciiunpunctitle 

576 if is_flagged: 

577 index["is_flagged"] = is_flagged 

578 index["flag_assignees"] = flag_assignees 

579 if most_urgent_flag_deadline: 

580 index["most_urgent_flag_deadline"] = most_urgent_flag_deadline 

581 index["continued"] = continued 

582 index["has_editor_group"] = has_editor_group 

583 index["has_editor"] = has_editor 

584 

585 index["issn"] = cbib.issns() 

586 if len(titles) > 0: 

587 index["title"] = titles 

588 if len(subjects) > 0: 

589 index["subject"] = subjects 

590 if len(schema_subjects) > 0: 

591 index["schema_subject"] = schema_subjects 

592 if len(classification) > 0: 

593 index["classification"] = classification 

594 if len(langs) > 0: 

595 index["language"] = langs 

596 if len(license) > 0: 

597 index["license"] = license 

598 if len(classification_paths) > 0: 

599 index["classification_paths"] = classification_paths 

600 if len(schema_codes) > 0: 

601 index["schema_code"] = schema_codes 

602 if len(schema_codes_tree) > 0: 

603 index["schema_codes_tree"] = schema_codes_tree 

604 

605 self.__seamless__.set_with_struct("index", index) 

606 

607 

608class Journal(JournalLikeObject): 

609 __type__ = "journal" 

610 

611 __SEAMLESS_STRUCT__ = [ 

612 shared_structs.JOURNAL_BIBJSON, 

613 shared_structs.SHARED_JOURNAL_LIKE, 

614 JOURNAL_STRUCT 

615 ] 

616 

617 __SEAMLESS_COERCE__ = COERCE_MAP 

618 

619 def __init__(self, **kwargs): 

620 # FIXME: hack, to deal with ES integration layer being improperly abstracted 

621 if "_source" in kwargs: 

622 kwargs = kwargs["_source"] 

623 # FIXME: I have taken this out for the moment, as I'm not sure it's what we should be doing 

624 # if kwargs: 

625 # self.add_autogenerated_fields(**kwargs) 

626 super(Journal, self).__init__(raw=kwargs) 

627 

628 @classmethod 

629 def add_autogenerated_fields(cls, **kwargs): 

630 bib = kwargs["bibjson"] 

631 if "apc" in bib and bib["apc"] != '': 

632 bib["apc"]["has_apc"] = len(bib["apc"]["max"]) != 0 

633 else: 

634 bib["apc"] = {"has_apc": False} 

635 if "deposit_policy" in bib and bib["deposit_policy"] != []: 

636 bib["deposit_policy"]["has_policy"] = True 

637 else: 

638 ##change made in https://github.com/DOAJ/doaj/commit/e507123f423fe16fd270744055da0129e2b32005 

639 bib["deposit_policy"] = {"has_policy": False} 

640 if "other_charges" in bib and bib["other_charges"] != '': 

641 bib["other_charges"]["has_other_charges"] = bib["other_charges"]["url"] is not None 

642 else: 

643 bib["other_charges"] = {"has_other_charges": False} 

644 if "copyright" in bib and bib["copyright"]["url"] != '': 

645 bib["copyright"]["author_retains"] = bib["copyright"]["url"] is not None 

646 else: 

647 bib["copyright"] = {"author_retains": False} 

648 if "pid_scheme" in bib and bib["pid_scheme"] != '': 

649 bib["pid_scheme"]["has_pid_scheme"] = len(bib["pid_scheme"]["scheme"]) != 0 

650 else: 

651 bib["pid_scheme"] = {"has_pid_scheme": False} 

652 if "preservation" in bib and bib["preservation"] != '': 

653 bib["preservation"]["has_preservation"] = (len(bib["preservation"]) != 0 or 

654 bib["national_library"] is not None) 

655 else: 

656 bib["preservation"] = {"has_preservation": True} 

657 

658 ##################################################### 

659 ## Journal-specific data access methods 

660 

661 @classmethod 

662 def all_in_doaj(cls, page_size=5000): 

663 q = JournalQuery() 

664 return cls.iterate(q.all_in_doaj(), page_size=page_size, wrap=True, keepalive='5m') 

665 

666 @classmethod 

667 def find_by_publisher(cls, publisher, exact=True): 

668 q = PublisherQuery(publisher, exact) 

669 result = cls.query(q=q.query()) 

670 records = [Journal(**r.get("_source")) for r in result.get("hits", {}).get("hits", [])] 

671 return records 

672 

673 @classmethod 

674 def find_by_title(cls, title): 

675 q = TitleQuery(title) 

676 result = cls.query(q=q.query()) 

677 records = [Journal(**r.get("_source")) for r in result.get("hits", {}).get("hits", [])] 

678 return records 

679 

680 @classmethod 

681 def delete_selected(cls, query, articles=False, snapshot_journals=True, snapshot_articles=True): 

682 if articles: 

683 # list the issns of all the journals 

684 issns = cls.issns_by_query(query) 

685 

686 # issue a delete request over all the articles by those issns 

687 from portality.models import Article 

688 Article.delete_by_issns(issns, snapshot=snapshot_articles) 

689 

690 # snapshot the journal record 

691 if snapshot_journals: 

692 js = cls.iterate(query, page_size=1000) 

693 for j in js: 

694 j.snapshot() 

695 

696 # finally issue a delete request against the journals 

697 cls.delete_by_query(query) 

698 

699 @classmethod 

700 def add_mapping_extensions(cls, default_mappings: dict): 

701 default_mappings_copy = deepcopy(default_mappings) 

702 mapping_extensions = app.config.get("DATAOBJ_TO_MAPPING_COPY_TO_EXTENSIONS") 

703 for key, value in mapping_extensions.items(): 

704 if key in default_mappings_copy: 

705 default_mappings_copy[key] = {**default_mappings_copy[key], **value} 

706 return default_mappings_copy 

707 

708 def all_articles(self): 

709 from portality.models import Article 

710 return Article.find_by_issns(self.known_issns()) 

711 

712 def article_stats(self): 

713 from portality.models import Article 

714 q = ArticleStatsQuery(self.known_issns()) 

715 data = Article.query(q=q.query()) 

716 hits = data.get("hits", {}) 

717 total = hits.get("total", {}).get('value', 0) 

718 latest = None 

719 if total > 0: 

720 latest = hits.get("hits", [])[0].get("_source").get("created_date") 

721 return { 

722 "total": total, 

723 "latest": latest 

724 } 

725 

726 def mappings(self): 

727 return es_data_mapping.create_mapping(self.__seamless_struct__.raw, MAPPING_OPTS) 

728 

729 ############################################ 

730 ## base property methods 

731 

732 @property 

733 def toc_id(self): 

734 id_ = self.bibjson().get_preferred_issn() 

735 if not id_: 

736 id_ = self.id 

737 return id_ 

738 

739 ############################################################ 

740 ## revision history methods 

741 

742 def snapshot(self): 

743 from portality.models import JournalHistory 

744 

745 snap = deepcopy(self.data) 

746 if "id" in snap: 

747 snap["about"] = snap["id"] 

748 del snap["id"] 

749 if "index" in snap: 

750 del snap["index"] 

751 if "last_updated" in snap: 

752 del snap["last_updated"] 

753 if "created_date" in snap: 

754 del snap["created_date"] 

755 

756 hist = JournalHistory(**snap) 

757 hist.save() 

758 

759 ####################################################################### 

760 ## Conversion methods 

761 

762 def make_continuation(self, type, eissn=None, pissn=None, title=None): 

763 # check that the type is one we know. Must be either 'replaces' or 'is_replaced_by' 

764 if type not in ["replaces", "is_replaced_by"]: 

765 raise ContinuationException("type must be one of 'replaces' or 'is_replaced_by'") 

766 

767 if eissn is None and pissn is None: 

768 raise ContinuationException("You must create a continuation with at least one issn") 

769 

770 # take a copy of the raw data for this journal, and the issns for this journal 

771 raw_cont = deepcopy(self.data) 

772 bibjson = self.bibjson() 

773 issns = bibjson.issns() 

774 cissns = [] 

775 

776 # make a new instance of the journal - this will be our continuation 

777 del raw_cont["id"] 

778 del raw_cont["created_date"] 

779 del raw_cont["last_updated"] 

780 j = Journal(**raw_cont) 

781 

782 # ensure that the journal is NOT in doaj. That will be for the admin to decide 

783 j.set_in_doaj(False) 

784 

785 # get a copy of the continuation's bibjson, then remove the existing issns 

786 cbj = j.bibjson() 

787 del cbj.eissn 

788 del cbj.pissn 

789 

790 # also remove any existing continuation information 

791 del cbj.replaces 

792 del cbj.is_replaced_by 

793 del cbj.discontinued_date 

794 

795 # now write the new identifiers 

796 if eissn is not None and eissn != "": 

797 cissns.append(eissn) 

798 cbj.eissn = eissn 

799 if pissn is not None and pissn != "": 

800 cissns.append(pissn) 

801 cbj.pissn = pissn 

802 

803 # update the title 

804 if title is not None: 

805 cbj.title = title 

806 

807 # now add the issns of the original journal in the appropriate field 

808 # 

809 # This is a bit confusing - because we're asking this of a Journal object, the relationship type we're asking 

810 # for relates to this journal, not to the continuation we are creating. This means that when setting the 

811 # new continuations properties, we have to do the opposite to what we do to the journal's properties 

812 # 

813 # "replaces" means that the current journal replaces the new continuation 

814 if type == "replaces": 

815 bibjson.replaces = cissns 

816 cbj.is_replaced_by = issns 

817 

818 # "is_replaced_by" means that the current journal is replaced by the new continuation 

819 elif type == "is_replaced_by": 

820 bibjson.is_replaced_by = cissns 

821 cbj.replaces = issns 

822 

823 # save this journal 

824 self.save() 

825 

826 # save the continuation, and return a copy to the caller 

827 j.save() 

828 return j 

829 

830 #################################################### 

831 ## admin data methods 

832 

833 def is_in_doaj(self): 

834 return self.__seamless__.get_single("admin.in_doaj", default=False) 

835 

836 def set_in_doaj(self, value): 

837 self.__seamless__.set_with_struct("admin.in_doaj", value) 

838 

839 def is_ticked(self): 

840 return self.__seamless__.get_single("admin.ticked", default=False) 

841 

842 def set_ticked(self, ticked): 

843 self.__seamless__.set_with_struct("admin.ticked", ticked) 

844 

845 @property 

846 def current_application(self): 

847 return self.__seamless__.get_single("admin.current_application") 

848 

849 def set_current_application(self, application_id): 

850 self.__seamless__.set_with_struct("admin.current_application", application_id) 

851 

852 def remove_current_application(self): 

853 self.__seamless__.delete("admin.current_application") 

854 

855 # Related Applications Functions 

856 ########### 

857 

858 @property 

859 def related_applications(self): 

860 return self.__seamless__.get_list("admin.related_applications") 

861 

862 def add_related_application(self, application_id, date_accepted=None, status=None): 

863 obj = {"application_id": application_id} 

864 self.__seamless__.delete_from_list("admin.related_applications", matchsub=obj) 

865 if date_accepted is not None: 

866 obj["date_accepted"] = date_accepted 

867 if status is not None: 

868 obj["status"] = status 

869 self.__seamless__.add_to_list_with_struct("admin.related_applications", obj) 

870 

871 def set_related_applications(self, related_applications_records): 

872 self.__seamless__.set_with_struct("admin.related_applications", related_applications_records) 

873 

874 def remove_related_applications(self): 

875 self.__seamless__.delete("admin.related_applications") 

876 

877 def remove_related_application(self, application_id): 

878 self.set_related_applications([r for r in self.related_applications 

879 if r.get("application_id") != application_id]) 

880 

881 def related_application_record(self, application_id): 

882 for record in self.related_applications: 

883 if record.get("application_id") == application_id: 

884 return record 

885 return None 

886 

887 def latest_related_application_id(self): 

888 related = self.related_applications 

889 if len(related) == 0: 

890 return None 

891 if len(related) == 1: 

892 return related[0].get("application_id") 

893 sorted(related, key=lambda x: x.get("date_accepted", DEFAULT_TIMESTAMP_VAL)) 

894 return related[0].get("application_id") 

895 

896 @property 

897 def last_update_request(self): 

898 related = self.related_applications_ordered 

899 if related is None: 

900 return None 

901 return related[0].get("date_accepted", DEFAULT_TIMESTAMP_VAL) 

902 

903 @property 

904 def related_applications_ordered(self): 

905 related = self.related_applications 

906 if len(related) == 0: 

907 return None 

908 sorted(related, key=lambda x: x.get("date_accepted", DEFAULT_TIMESTAMP_VAL)) 

909 return related 

910 

911 ######## 

912 

913 @property 

914 def last_full_review(self): 

915 return self.__seamless__.get_single("admin.last_full_review") 

916 

917 @property 

918 def last_full_review_timestamp(self): 

919 return self.__seamless__.get_single("admin.last_full_review", coerce=coerce.to_datestamp()) 

920 

921 @last_full_review.setter 

922 def last_full_review(self, value): 

923 self.__seamless__.set_with_struct("admin.last_full_review", value) 

924 

925 @property 

926 def last_withdrawn(self): 

927 return self.__seamless__.get_single("admin.last_withdrawn") 

928 

929 @property 

930 def last_withdrawn_timestamp(self): 

931 return self.__seamless__.get_single("admin.last_withdrawn", coerce=coerce.to_datestamp()) 

932 

933 @last_withdrawn.setter 

934 def last_withdrawn(self, value): 

935 self.__seamless__.set_with_struct("admin.last_withdrawn", value) 

936 

937 @property 

938 def last_reinstated(self): 

939 return self.__seamless__.get_single("admin.last_reinstated") 

940 

941 @property 

942 def last_reinstated_timestamp(self): 

943 return self.__seamless__.get_single("admin.last_reinstated", coerce=coerce.to_datestamp()) 

944 

945 @last_reinstated.setter 

946 def last_reinstated(self, value): 

947 self.__seamless__.set_with_struct("admin.last_reinstated", value) 

948 

949 @property 

950 def last_owner_transfer(self): 

951 return self.__seamless__.get_single("admin.last_owner_transfer") 

952 

953 @property 

954 def last_owner_transfer_timestamp(self): 

955 return self.__seamless__.get_single("admin.last_owner_transfer", coerce=coerce.to_datestamp()) 

956 

957 @last_owner_transfer.setter 

958 def last_owner_transfer(self, value): 

959 self.__seamless__.set_with_struct("admin.last_owner_transfer", value) 

960 

961 ######################################################################## 

962 ## Functions for handling continuations 

963 

964 

965 def _get_continuations(self, issns, 

966 get_sub_journals: Callable, 

967 journal_caches: set[str] = None) -> Iterable['Journal']: 

968 """ 

969 

970 Parameters 

971 ---------- 

972 issns 

973 get_sub_journals 

974 journal_caches 

975 contain completed journals ids, avoid infinite recursion by passing a 

976 set of journal objects that have already been processed 

977 """ 

978 journal_caches = journal_caches or set() 

979 journal_caches.add(self.id) 

980 journals = self.q2obj(q=ContinuationQuery(issns).query()) 

981 journals = [j for j in journals if j.id not in journal_caches] 

982 journal_caches.update({j.id for j in journals}) 

983 

984 subjournals = [] 

985 for j in journals: 

986 subjournals += get_sub_journals(j, journal_caches) 

987 

988 return journals + subjournals 

989 

990 def get_future_continuations(self, journal_caches: set[str]=None) -> Iterable['Journal']: 

991 return self._get_continuations(self.bibjson().is_replaced_by, 

992 lambda j, jc: j.get_future_continuations(jc), 

993 journal_caches=journal_caches) 

994 

995 def get_past_continuations(self, journal_caches: set[str]=None) -> Iterable['Journal']: 

996 return self._get_continuations(self.bibjson().replaces, 

997 lambda j, jc: j.get_past_continuations(jc), 

998 journal_caches=journal_caches) 

999 

1000 ####################################################################### 

1001 

1002 ##################################################### 

1003 ## operations we can do to the journal 

1004 

1005 def calculate_tick(self): 

1006 created_date = self.created_date 

1007 last_update_request = self.last_update_request 

1008 

1009 tick_threshold = app.config.get("TICK_THRESHOLD", '2014-03-19T00:00:00Z') 

1010 threshold = dates.parse(tick_threshold) 

1011 

1012 if created_date is None: # don't worry about the last_update_request date - you can't update unless you've been created! 

1013 # we haven't even saved the record yet. All we need to do is check that the tick 

1014 # threshold is in the past (which I suppose theoretically it could not be), then 

1015 # set it 

1016 if dates.now() >= threshold: 

1017 self.set_ticked(True) 

1018 else: 

1019 self.set_ticked(False) 

1020 return 

1021 

1022 # otherwise, this is an existing record, and we just need to update it 

1023 

1024 # convert the strings to datetime objects 

1025 created = dates.parse(created_date) 

1026 lud = None 

1027 if last_update_request is not None: 

1028 lud = dates.parse(last_update_request) 

1029 

1030 if created >= threshold and self.is_in_doaj(): 

1031 self.set_ticked(True) 

1032 return 

1033 

1034 if lud is not None and lud >= threshold and self.is_in_doaj(): 

1035 self.set_ticked(True) 

1036 return 

1037 

1038 self.set_ticked(False) 

1039 

1040 def propagate_in_doaj_status_to_articles(self): 

1041 for article in self.all_articles(): 

1042 article.set_in_doaj(self.is_in_doaj()) 

1043 article.save() 

1044 

1045 def prep(self, is_update=True): 

1046 self._ensure_in_doaj() 

1047 self.calculate_tick() 

1048 self._generate_index() 

1049 self._calculate_has_apc() 

1050 self._generate_autocompletes() 

1051 if is_update: 

1052 self.set_last_updated() 

1053 

1054 def save(self, snapshot=True, sync_owner=True, update_last_updated=True, **kwargs): 

1055 self.prep(is_update=update_last_updated) 

1056 self.verify_against_struct() 

1057 if sync_owner: 

1058 self._sync_owner_to_application() 

1059 res = super(Journal, self).save(update_last_updated=update_last_updated, **kwargs) 

1060 if snapshot: 

1061 self.snapshot() 

1062 return res 

1063 

1064 ###################################################### 

1065 ## internal utility methods 

1066 

1067 def _generate_autocompletes(self): 

1068 bj = self.bibjson() 

1069 publisher = bj.publisher 

1070 institution = bj.institution 

1071 

1072 if publisher is not None: 

1073 self.__seamless__.set_with_struct("index.publisher_ac", publisher.lower()) 

1074 

1075 if institution is not None: 

1076 self.__seamless__.set_with_struct("index.institution_ac", institution.lower()) 

1077 

1078 def _ensure_in_doaj(self): 

1079 if self.__seamless__.get_single("admin.in_doaj", default=None) is None: 

1080 self.set_in_doaj(False) 

1081 

1082 def _sync_owner_to_application(self): 

1083 if self.current_application is None: 

1084 return 

1085 from portality.models.v2.application import Application 

1086 ca = Application.pull(self.current_application) 

1087 if ca is not None and ca.owner != self.owner: 

1088 ca.set_owner(self.owner) 

1089 ca.save(sync_owner=False) 

1090 

1091 def _calculate_has_apc(self): 

1092 # work out of the journal has an apc 

1093 has_apc = "No Information" 

1094 apc_present = self.bibjson().has_apc 

1095 if apc_present: 

1096 has_apc = "Yes" 

1097 elif self.is_ticked(): # Because if an item is not ticked we want to say "No Information" 

1098 has_apc = "No" 

1099 

1100 self.__seamless__.set_with_struct("index.has_apc", has_apc) 

1101 

1102 

1103MAPPING_OPTS = { 

1104 "dynamic": None, 

1105 "coerces": Journal.add_mapping_extensions(app.config["DATAOBJ_TO_MAPPING_DEFAULTS"]), 

1106 "exceptions": {**app.config["ADMIN_NOTES_SEARCH_MAPPING"], **app.config["JOURNAL_EXCEPTION_MAPPING"]}, 

1107 "additional_mappings": app.config["ADMIN_NOTES_INDEX_ONLY_FIELDS"] 

1108} 

1109 

1110 

1111######################################################## 

1112## Data Access Queries 

1113 

1114class JournalQuery(object): 

1115 """ 

1116 wrapper around the kinds of queries we want to do against the journal type 

1117 """ 

1118 issn_query = { 

1119 "track_total_hits": True, 

1120 "query": { 

1121 "bool": { 

1122 "must": [ 

1123 { 

1124 "terms": {"index.issn.exact": "<issn>"} 

1125 } 

1126 ] 

1127 } 

1128 } 

1129 } 

1130 

1131 must_query = { 

1132 "track_total_hits": True, 

1133 "query": { 

1134 "bool": { 

1135 "must": [ 

1136 ] 

1137 } 

1138 } 

1139 } 

1140 

1141 all_doaj = { 

1142 "track_total_hits": True, 

1143 "query": { 

1144 "bool": { 

1145 "must": [ 

1146 {"term": {"admin.in_doaj": True}} 

1147 ] 

1148 } 

1149 } 

1150 } 

1151 

1152 _minified_fields = ["id", "bibjson.title", "last_updated"] 

1153 

1154 def __init__(self, minified=False, sort_by_title=False): 

1155 self.query = None 

1156 self.minified = minified 

1157 self.sort_by_title = sort_by_title 

1158 

1159 def find_by_issn(self, issns, in_doaj=None, max=10): 

1160 self.query = deepcopy(self.issn_query) 

1161 self.query["query"]["bool"]["must"][0]["terms"]["index.issn.exact"] = issns 

1162 if in_doaj is not None: 

1163 self.query["query"]["bool"]["must"].append({"term": {"admin.in_doaj": in_doaj}}) 

1164 self.query["size"] = max 

1165 

1166 def find_by_issn_exact(self, issns, in_doaj=None, max=10): 

1167 self.query = deepcopy(self.must_query) 

1168 for issn in issns: 

1169 self.query["query"]["bool"]["must"].append({"term": {"index.issn.exact": issn}}) 

1170 if in_doaj is not None: 

1171 self.query["query"]["bool"]["must"].append({"term": {"admin.in_doaj": in_doaj}}) 

1172 self.query["size"] = max 

1173 

1174 def all_in_doaj(self): 

1175 q = deepcopy(self.all_doaj) 

1176 if self.minified: 

1177 q["fields"] = self._minified_fields 

1178 if self.sort_by_title: 

1179 q["sort"] = [{"bibjson.title.exact": {"order": "asc"}}] 

1180 return q 

1181 

1182 

1183class JournalURLQuery(object): 

1184 def __init__(self, url, in_doaj=None, max=10): 

1185 self.url = url 

1186 self.in_doaj = in_doaj 

1187 self.max = max 

1188 

1189 def query(self): 

1190 q = { 

1191 "track_total_hits": True, 

1192 "query": { 

1193 "bool": { 

1194 "must": [ 

1195 { 

1196 "match": {"bibjson.ref.journal.exact": self.url} 

1197 } 

1198 ] 

1199 } 

1200 }, 

1201 "size": self.max 

1202 } 

1203 if self.in_doaj is not None: 

1204 q["query"]["bool"]["must"].append({"term": {"admin.in_doaj": self.in_doaj}}) 

1205 return q 

1206 

1207 

1208class IssnQuery(object): 

1209 def __init__(self, owner, in_doaj=None, issn_field=None): 

1210 self._owner = owner 

1211 self._in_doaj = in_doaj 

1212 self._issn_field = issn_field or 'index.issn.exact' 

1213 

1214 def query(self): 

1215 musts = [{"term": {"admin.owner.exact": self._owner}}] 

1216 if self._in_doaj is not None: 

1217 musts.append({"term": {"admin.in_doaj": self._in_doaj}}) 

1218 return { 

1219 "track_total_hits": True, 

1220 "query": { 

1221 "bool": { 

1222 "must": musts 

1223 } 

1224 }, 

1225 "size": 0, 

1226 "aggs": { 

1227 "issns": { 

1228 "terms": { 

1229 "field": self._issn_field, 

1230 "size": 10000, 

1231 "order": {"_key": "asc"} 

1232 } 

1233 } 

1234 } 

1235 } 

1236 

1237 

1238class OwnerQuery(object): 

1239 """ Query to supply all full journal sources by owner """ 

1240 base_query = { 

1241 "track_total_hits": True, 

1242 "query": { 

1243 "term": {"admin.owner.exact": "<owner id here>"} 

1244 }, 

1245 "size": 10000, 

1246 } 

1247 

1248 def __init__(self, owner): 

1249 self._query = deepcopy(self.base_query) 

1250 self._query["query"]["term"]["admin.owner.exact"] = owner 

1251 

1252 def query(self): 

1253 return self._query 

1254 

1255 

1256class PublisherQuery(object): 

1257 exact_query = { 

1258 "track_total_hits": True, 

1259 "query": { 

1260 "term": {"bibjson.publisher.name.exact": "<publisher name here>"} 

1261 }, 

1262 "size": 10000 

1263 } 

1264 

1265 inexact_query = { 

1266 "track_total_hits": True, 

1267 "query": { 

1268 "term": {"bibjson.publisher.name": "<publisher name here>"} 

1269 }, 

1270 "size": 10000 

1271 } 

1272 

1273 def __init__(self, publisher, exact=True): 

1274 self.publisher = publisher 

1275 self.exact = exact 

1276 

1277 def query(self): 

1278 q = None 

1279 if self.exact: 

1280 q = deepcopy(self.exact_query) 

1281 q["query"]["term"]["bibjson.publisher.name.exact"] = self.publisher 

1282 else: 

1283 q = deepcopy(self.inexact_query) 

1284 q["query"]["term"]["bibjson.publisher.name"] = self.publisher.lower() 

1285 return q 

1286 

1287 

1288class TitleQuery(object): 

1289 base_query = { 

1290 "track_total_hits": True, 

1291 "query": { 

1292 "term": {"index.title.exact": "<title here>"} 

1293 }, 

1294 "size": 10000 

1295 } 

1296 

1297 def __init__(self, title): 

1298 self.title = title 

1299 

1300 def query(self): 

1301 q = deepcopy(self.base_query) 

1302 q["query"]["term"]["index.title.exact"] = self.title 

1303 return q 

1304 

1305 

1306class ContinuationQuery(object): 

1307 def __init__(self, issns): 

1308 self.issns = issns 

1309 

1310 def query(self): 

1311 return { 

1312 "track_total_hits": True, 

1313 "query": { 

1314 "bool": { 

1315 "must": [ 

1316 {"terms": {"index.issn.exact": self.issns}} 

1317 ] 

1318 } 

1319 }, 

1320 "size": 10000 

1321 } 

1322 

1323 

1324class ArticleStatsQuery(object): 

1325 def __init__(self, issns): 

1326 self.issns = issns 

1327 

1328 def query(self): 

1329 return { 

1330 "track_total_hits": True, 

1331 "query": { 

1332 "bool": { 

1333 "must": [ 

1334 {"terms": {"index.issn.exact": self.issns}}, 

1335 {"term": {"admin.in_doaj": True}} 

1336 ] 

1337 } 

1338 }, 

1339 "size": 1, 

1340 "_source": { 

1341 "include": ["created_date"] 

1342 }, 

1343 "sort": [{"created_date": {"order": "desc"}}] 

1344 } 

1345 

1346 

1347class RecentJournalsQuery(object): 

1348 def __init__(self, max): 

1349 self.max = max 

1350 

1351 def query(self): 

1352 return { 

1353 "track_total_hits": True, 

1354 "query": {"match_all": {}}, 

1355 "size": self.max, 

1356 "sort": [ 

1357 {"created_date": {"order": "desc"}} 

1358 ] 

1359 }