Coverage for portality/models/v2/journal.py: 92%

1from portality.dao import DomainObject

2from portality.core import app

3from portality.models.v2.bibjson import JournalLikeBibJSON

4from portality.models.v2 import shared_structs

5from portality.models.account import Account

6from portality.lib import es_data_mapping, dates, coerce

7from portality.lib.seamless import SeamlessMixin

8from portality.lib.coerce import COERCE_MAP

10from copy import deepcopy

11from datetime import datetime, timedelta

13import string, uuid

14from unidecode import unidecode

16JOURNAL_STRUCT = {

17 "objects": [

18 "admin", "index"

19 ],

21 "structs": {

22 "admin": {

23 "fields": {

24 "in_doaj": {"coerce": "bool"},

25 "ticked": {"coerce": "bool"},

26 "current_application": {"coerce": "unicode"}

27 },

28 "lists": {

29 "related_applications": {"contains": "object"}

30 },

31 "structs": {

32 "related_applications": {

33 "fields": {

34 "application_id": {"coerce": "unicode"},

35 "date_accepted": {"coerce": "utcdatetime"},

36 "status": {"coerce": "unicode"}

37 }

38 },

39 "contact": {

40 "name": {"coerce": "unicode"},

41 "email": {"coerce": "unicode"}

42 }

43 }

44 },

45 "index": {

46 "fields": {

47 "publisher_ac": {"coerce": "unicode"},

48 "institution_ac": {"coerce": "unicode"}

49 }

50 }

51 }

52}

55class ContinuationException(Exception):

56 pass

59class JournalLikeObject(SeamlessMixin, DomainObject):

61 @classmethod

62 def find_by_issn(cls, issns, in_doaj=None, max=10):

63 if not isinstance(issns, list):

64 issns = [issns]

65 q = JournalQuery()

66 q.find_by_issn(issns, in_doaj=in_doaj, max=max)

67 result = cls.query(q=q.query)

68 # create an array of objects, using cls rather than Journal, which means subclasses can use it too

69 records = [cls(**r.get("_source")) for r in result.get("hits", {}).get("hits", [])]

70 return records

72 @classmethod

73 def issns_by_owner(cls, owner, in_doaj=None):

74 q = IssnQuery(owner, in_doaj=in_doaj)

75 res = cls.query(q=q.query())

76 issns = [term.get("key") for term in res.get("aggregations", {}).get("issns", {}).get("buckets", [])]

77 return issns

79 @classmethod

80 def get_by_owner(cls, owner):

81 q = OwnerQuery(owner)

82 res = cls.query(q=q.query())

83 # get_by_owner() in application.py predates this, but I've made it an override because it does application stuff

84 records = [cls(**r.get("_source")) for r in res.get("hits", {}).get("hits", [])]

85 return records

87 @classmethod

88 def issns_by_query(cls, query):

89 issns = []

90 for j in cls.iterate(query):

91 issns += j.known_issns()

92 return issns

94 @classmethod

95 def find_by_journal_url(cls, url, in_doaj=None, max=10):

96 q = JournalURLQuery(url, in_doaj, max)

97 result = cls.query(q=q.query())

98 # create an array of objects, using cls rather than Journal, which means subclasses can use it too

99 records = [cls(**r.get("_source")) for r in result.get("hits", {}).get("hits", [])]

100 return records

101

102 @classmethod

103 def recent(cls, max=10):

104 q = RecentJournalsQuery(max)

105 result = cls.query(q=q.query())

106 # create an array of objects, using cls rather than Journal, which means subclasses can use it too

107 records = [cls(**r.get("_source")) for r in result.get("hits", {}).get("hits", [])]

108 return records

109

110 ############################################

111 ## base property methods

112

113 @property

114 def data(self):

115 return self.__seamless__.data

116

117 @property

118 def has_apc(self):

119 return self.__seamless__.get_single("bibjson.apc.has_apc")

120

121 @property

122 def id(self):

123 return self.__seamless__.get_single("id")

124

125 def set_id(self, id=None):

126 if id is None:

127 id = self.makeid()

128 self.__seamless__.set_with_struct("id", id)

129

130 def set_created(self, date=None):

131 if date is None:

132 date = dates.now()

133 self.__seamless__.set_with_struct("created_date", date)

134

135 @property

136 def created_date(self):

137 return self.__seamless__.get_single("created_date")

138

139 @property

140 def created_timestamp(self):

141 return self.__seamless__.get_single("created_date", coerce=coerce.to_datestamp())

142

143 def set_last_updated(self, date=None):

144 if date is None:

145 date = dates.now()

146 self.__seamless__.set_with_struct("last_updated", date)

147

148 @property

149 def last_updated(self):

150 return self.__seamless__.get_single("last_updated")

151

152 @property

153 def last_updated_timestamp(self):

154 return self.__seamless__.get_single("last_updated", coerce=coerce.to_datestamp())

155

156 def last_updated_since(self, days=0):

157 return self.last_updated_timestamp > (datetime.utcnow() - timedelta(days=days))

158

159 def set_last_manual_update(self, date=None):

160 if date is None:

161 date = dates.now()

162 self.__seamless__.set_with_struct("last_manual_update", date)

163

164 @property

165 def last_manual_update(self):

166 return self.__seamless__.get_single("last_manual_update")

167

168 @property

169 def last_manual_update_timestamp(self):

170 return self.__seamless__.get_single("last_manual_update", coerce=coerce.to_datestamp())

171

172 def has_been_manually_updated(self):

173 lmut = self.last_manual_update_timestamp

174 if lmut is None:

175 return False

176 return lmut > datetime.utcfromtimestamp(0)

177

178 def has_seal(self):

179 return self.__seamless__.get_single("admin.seal", default=False)

180

181 def set_seal(self, value):

182 self.__seamless__.set_with_struct("admin.seal", value)

183

184 def has_oa_start_date(self):

185 return self.__seamless__.get_single("bibjson.oa_start", default=False)

186

187 @property

188 def owner(self):

189 return self.__seamless__.get_single("admin.owner")

190

191 def set_owner(self, owner):

192 self.__seamless__.set_with_struct("admin.owner", owner)

193

194 def remove_owner(self):

195 self.__seamless__.delete("admin.owner")

196

197 @property

198 def owner_account(self):

199 if self.owner:

200 return Account.pull(self.owner)

201 return None

202

203 @property

204 def editor_group(self):

205 return self.__seamless__.get_single("admin.editor_group")

206

207 def set_editor_group(self, eg):

208 self.__seamless__.set_with_struct("admin.editor_group", eg)

209

210 def remove_editor_group(self):

211 self.__seamless__.delete("admin.editor_group")

212

213 @property

214 def editor(self):

215 return self.__seamless__.get_single("admin.editor")

216

217 def set_editor(self, ed):

218 self.__seamless__.set_with_struct("admin.editor", ed)

219

220 def remove_editor(self):

221 self.__seamless__.delete('admin.editor')

222

223 @property

224 def contact(self):

225 return self.__seamless__.get_single("admin.contact")

226

227 @property

228 def contact_name(self):

229 return self.__seamless__.get_single("admin.contact.name")

230

231 @contact_name.setter

232 def contact_name(self, name):

233 self.__seamless__.set_with_struct("admin.contact.name", name)

234

235 @property

236 def contact_email(self):

237 return self.__seamless__.get_single("admin.contact.email")

238

239 @contact_email.setter

240 def contact_email(self, email):

241 self.__seamless__.set_with_struct("admin.contact.email", email)

242

243 def set_contact(self, name, email):

244 self.contact_name = name

245 self.contact_email = email

246

247 def remove_contact(self):

248 self.__seamless__.delete("admin.contact")

249

250 def add_note(self, note, date=None, id=None):

251 if not date:

252 date = dates.now()

253 obj = {"date": date, "note": note, "id": id}

254 self.__seamless__.delete_from_list("admin.notes", matchsub=obj)

255 if not id:

256 obj["id"] = uuid.uuid4()

257 self.__seamless__.add_to_list_with_struct("admin.notes", obj)

258

259 def remove_note(self, note):

260 self.__seamless__.delete_from_list("admin.notes", matchsub=note)

261

262 def set_notes(self, notes):

263 self.__seamless__.set_with_struct("admin.notes", notes)

264

265 def remove_notes(self):

266 self.__seamless__.delete("admin.notes")

267

268 @property

269 def notes(self):

270 return self.__seamless__.get_list("admin.notes")

271

272 @property

273 def ordered_notes(self):

274 """Orders notes by newest first"""

275 notes = self.notes

276 clusters = {}

277 for note in notes:

278 if "date" not in note:

279 note["date"] = "1970-01-01T00:00:00Z" # this really means something is broken with note date setting, which needs to be fixed

280 if note["date"] not in clusters:

281 clusters[note["date"]] = [note]

282 else:

283 clusters[note["date"]].append(note)

284

285 ordered_keys = sorted(list(clusters.keys()), reverse=True)

286 ordered = []

287 for key in ordered_keys:

288 clusters[key].reverse()

289 ordered += clusters[key]

290 return ordered

291

292 def bibjson(self):

293 bj = self.__seamless__.get_single("bibjson")

294 if bj is None:

295 self.__seamless__.set_single("bibjson", {})

296 bj = self.__seamless__.get_single("bibjson")

297 return JournalLikeBibJSON(bj)

298

299 def set_bibjson(self, bibjson):

300 bibjson = bibjson.data if isinstance(bibjson, JournalLikeBibJSON) else bibjson

301 self.__seamless__.set_with_struct("bibjson", bibjson)

302

303 ######################################################

304 ## DEPRECATED METHODS

305

306 def known_issns(self):

307 """

308 DEPRECATED

309

310 all issns this journal is known by

311

312 This used to mean "all issns the journal has ever been known by", but that definition has changed since

313 continuations have been separated from the single journal object model.

314

315 Now this is just a proxy for self.bibjson().issns()

316 """

317 return self.bibjson().issns()

318

319 def get_latest_contact_name(self):

320 return self.contact_name

321

322 def get_latest_contact_email(self):

323 return self.contact_email

324

325 def add_contact(self, name, email):

326 self.set_contact(name, email)

327

328 def remove_contacts(self):

329 self.remove_contact()

330

331 ######################################################

332 ## internal utility methods

333

334 def _generate_index(self):

335 # the index fields we are going to generate

336 titles = []

337 subjects = []

338 schema_subjects = []

339 schema_codes = []

340 schema_codes_tree = []

341 classification = []

342 langs = []

343 country = None

344 license = []

345 publisher = []

346 has_seal = None

347 classification_paths = []

348 unpunctitle = None

349 asciiunpunctitle = None

350 continued = "No"

351 has_editor_group = "No"

352 has_editor = "No"

353

354 # the places we're going to get those fields from

355 cbib = self.bibjson()

356

357 # get the title out of the current bibjson

358 if cbib.title is not None:

359 titles.append(cbib.title)

360 if cbib.alternative_title:

361 titles.append(cbib.alternative_title)

362

363 # get the subjects and concatenate them with their schemes from the current bibjson

364 for subs in cbib.subject:

365 scheme = subs.get("scheme")

366 term = subs.get("term")

367 subjects.append(term)

368 schema_subjects.append(scheme + ":" + term)

369 classification.append(term)

370 if "code" in subs:

371 schema_codes.append(scheme + ":" + subs.get("code"))

372

373 # now expand the classification to hold all its parent terms too

374 additional = []

375 for c in classification:

376 tp = cbib.term_path(c)

377 if tp is not None:

378 additional += tp

379 classification += additional

380

381 # add the keywords to the non-schema subjects (but not the classification)

382 subjects += cbib.keywords

383

384 # get the bibjson object to convert the languages to the english form

385 langs = cbib.language_name()

386

387 # get the english name of the country

388 country = cbib.country_name()

389

390 # get the type of the licenses

391 for l in cbib.licences:

392 license.append(l.get("type"))

393

394 # deduplicate the lists

395 titles = list(set(titles))

396 subjects = list(set(subjects))

397 schema_subjects = list(set(schema_subjects))

398 classification = list(set(classification))

399 license = list(set(license))

400 schema_codes = list(set(schema_codes))

401

402 # determine if the seal is applied

403 has_seal = "Yes" if self.has_seal() else "No"

404

405 # get the full classification paths for the subjects

406 classification_paths = cbib.lcc_paths()

407 schema_codes_tree = cbib.lcc_codes_full_list()

408

409 # create an unpunctitle

410 if cbib.title is not None:

411 throwlist = string.punctuation + '\n\t'

412 unpunctitle = "".join(c for c in cbib.title if c not in throwlist).strip()

413 try:

414 asciiunpunctitle = unidecode(unpunctitle)

415 except:

416 asciiunpunctitle = unpunctitle

417

418 # record if this journal object is a continuation

419 if len(cbib.replaces) > 0 or len(cbib.is_replaced_by) > 0:

420 continued = "Yes"

421

422 if self.editor_group is not None:

423 has_editor_group = "Yes"

424

425 if self.editor is not None:

426 has_editor = "Yes"

427

428 # build the index part of the object

429 index = {}

430

431 if country is not None:

432 index["country"] = country

433 if has_seal:

434 index["has_seal"] = has_seal

435 if unpunctitle is not None:

436 index["unpunctitle"] = unpunctitle

437 if asciiunpunctitle is not None:

438 index["asciiunpunctitle"] = asciiunpunctitle

439 index["continued"] = continued

440 index["has_editor_group"] = has_editor_group

441 index["has_editor"] = has_editor

442

443 index["issn"] = cbib.issns()

444 if len(titles) > 0:

445 index["title"] = titles

446 if len(subjects) > 0:

447 index["subject"] = subjects

448 if len(schema_subjects) > 0:

449 index["schema_subject"] = schema_subjects

450 if len(classification) > 0:

451 index["classification"] = classification

452 if len(langs) > 0:

453 index["language"] = langs

454 if len(license) > 0:

455 index["license"] = license

456 if len(classification_paths) > 0:

457 index["classification_paths"] = classification_paths

458 if len(schema_codes) > 0:

459 index["schema_code"] = schema_codes

460 if len(schema_codes_tree) > 0:

461 index["schema_codes_tree"] = schema_codes_tree

462

463 self.__seamless__.set_with_struct("index", index)

464

465

466class Journal(JournalLikeObject):

467 __type__ = "journal"

468

469 __SEAMLESS_STRUCT__ = [

470 shared_structs.JOURNAL_BIBJSON,

471 shared_structs.SHARED_JOURNAL_LIKE,

472 JOURNAL_STRUCT

473 ]

474

475 __SEAMLESS_COERCE__ = COERCE_MAP

476

477 def __init__(self, **kwargs):

478 # FIXME: hack, to deal with ES integration layer being improperly abstracted

479 if "_source" in kwargs:

480 kwargs = kwargs["_source"]

481 # FIXME: I have taken this out for the moment, as I'm not sure it's what we should be doing

482 #if kwargs:

483 # self.add_autogenerated_fields(**kwargs)

484 super(Journal, self).__init__(raw=kwargs)

485

486 @classmethod

487 def add_autogenerated_fields(cls, **kwargs):

488 bib = kwargs["bibjson"]

489 if "apc" in bib and bib["apc"] != '':

490 bib["apc"]["has_apc"] = len(bib["apc"]["max"]) != 0

491 else:

492 bib["apc"] = {"has_apc": False}

493 if "deposit_policy" in bib and bib["deposit_policy"] != []:

494 bib["deposit_policy"]["has_policy"] = True

495 else:

496 ##change made in https://github.com/DOAJ/doaj/commit/e507123f423fe16fd270744055da0129e2b32005

497 bib["deposit_policy"] = {"has_policy": False}

498 if "other_charges" in bib and bib["other_charges"] != '':

499 bib["other_charges"]["has_other_charges"] = bib["other_charges"]["url"] is not None

500 else:

501 bib["other_charges"] = {"has_other_charges": False}

502 if "copyright" in bib and bib["copyright"]["url"] != '':

503 bib["copyright"]["author_retains"] = bib["copyright"]["url"] is not None

504 else:

505 bib["copyright"] = {"author_retains": False}

506 if "pid_scheme" in bib and bib["pid_scheme"] != '':

507 bib["pid_scheme"]["has_pid_scheme"] = len(bib["pid_scheme"]["scheme"]) != 0

508 else:

509 bib["pid_scheme"] = {"has_pid_scheme": False}

510 if "preservation" in bib and bib["preservation"] != '':

511 bib["preservation"]["has_preservation"] = (len(bib["preservation"]) != 0 or

512 bib["national_library"] is not None)

513 else:

514 bib["preservation"] = {"has_preservation": True}

515

516 #####################################################

517 ## Journal-specific data access methods

518

519 @classmethod

520 def all_in_doaj(cls, page_size=5000):

521 q = JournalQuery()

522 return cls.iterate(q.all_in_doaj(), page_size=page_size, wrap=True)

523

524 @classmethod

525 def find_by_publisher(cls, publisher, exact=True):

526 q = PublisherQuery(publisher, exact)

527 result = cls.query(q=q.query())

528 records = [Journal(**r.get("_source")) for r in result.get("hits", {}).get("hits", [])]

529 return records

530

531 @classmethod

532 def find_by_title(cls, title):

533 q = TitleQuery(title)

534 result = cls.query(q=q.query())

535 records = [Journal(**r.get("_source")) for r in result.get("hits", {}).get("hits", [])]

536 return records

537

538 @classmethod

539 def delete_selected(cls, query, articles=False, snapshot_journals=True, snapshot_articles=True):

540 if articles:

541 # list the issns of all the journals

542 issns = cls.issns_by_query(query)

543

544 # issue a delete request over all the articles by those issns

545 from portality.models import Article

546 Article.delete_by_issns(issns, snapshot=snapshot_articles)

547

548 # snapshot the journal record

549 if snapshot_journals:

550 js = cls.iterate(query, page_size=1000)

551 for j in js:

552 j.snapshot()

553

554 # finally issue a delete request against the journals

555 cls.delete_by_query(query)

556

557 def all_articles(self):

558 from portality.models import Article

559 return Article.find_by_issns(self.known_issns())

560

561 def article_stats(self):

562 from portality.models import Article

563 q = ArticleStatsQuery(self.known_issns())

564 data = Article.query(q=q.query())

565 hits = data.get("hits", {})

566 total = hits.get("total", {}).get('value', 0)

567 latest = None

568 if total > 0:

569 latest = hits.get("hits", [])[0].get("_source").get("created_date")

570 return {

571 "total": total,

572 "latest": latest

573 }

574

575 def mappings(self):

576 return es_data_mapping.create_mapping(self.__seamless_struct__.raw, MAPPING_OPTS)

577

578 ############################################

579 ## base property methods

580

581 @property

582 def toc_id(self):

583 id_ = self.bibjson().get_preferred_issn()

584 if not id_:

585 id_ = self.id

586 return id_

587

588 @property

589 def last_update_request(self):

590 related = self.related_applications

591 if len(related) == 0:

592 return None

593 sorted(related, key=lambda x: x.get("date_accepted", "1970-01-01T00:00:00Z"))

594 return related[0].get("date_accepted", "1970-01-01T00:00:00Z")

595

596 ############################################################

597 ## revision history methods

598

599 def snapshot(self):

600 from portality.models import JournalHistory

601

602 snap = deepcopy(self.data)

603 if "id" in snap:

604 snap["about"] = snap["id"]

605 del snap["id"]

606 if "index" in snap:

607 del snap["index"]

608 if "last_updated" in snap:

609 del snap["last_updated"]

610 if "created_date" in snap:

611 del snap["created_date"]

612

613 hist = JournalHistory(**snap)

614 hist.save()

615

616 #######################################################################

617 ## Conversion methods

618

619 def make_continuation(self, type, eissn=None, pissn=None, title=None):

620 # check that the type is one we know. Must be either 'replaces' or 'is_replaced_by'

621 if type not in ["replaces", "is_replaced_by"]:

622 raise ContinuationException("type must be one of 'replaces' or 'is_replaced_by'")

623

624 if eissn is None and pissn is None:

625 raise ContinuationException("You must create a continuation with at least one issn")

626

627 # take a copy of the raw data for this journal, and the issns for this journal

628 raw_cont = deepcopy(self.data)

629 bibjson = self.bibjson()

630 issns = bibjson.issns()

631 cissns = []

632

633 # make a new instance of the journal - this will be our continuation

634 del raw_cont["id"]

635 del raw_cont["created_date"]

636 del raw_cont["last_updated"]

637 j = Journal(**raw_cont)

638

639 # ensure that the journal is NOT in doaj. That will be for the admin to decide

640 j.set_in_doaj(False)

641

642 # get a copy of the continuation's bibjson, then remove the existing issns

643 cbj = j.bibjson()

644 del cbj.eissn

645 del cbj.pissn

646

647 # also remove any existing continuation information

648 del cbj.replaces

649 del cbj.is_replaced_by

650 del cbj.discontinued_date

651

652 # now write the new identifiers

653 if eissn is not None and eissn != "":

654 cissns.append(eissn)

655 cbj.eissn = eissn

656 if pissn is not None and pissn != "":

657 cissns.append(pissn)

658 cbj.pissn = pissn

659

660 # update the title

661 if title is not None:

662 cbj.title = title

663

664 # now add the issns of the original journal in the appropriate field

665 #

666 # This is a bit confusing - because we're asking this of a Journal object, the relationship type we're asking

667 # for relates to this journal, not to the continuation we are creating. This means that when setting the

668 # new continuations properties, we have to do the opposite to what we do to the journal's properties

669 #

670 # "replaces" means that the current journal replaces the new continuation

671 if type == "replaces":

672 bibjson.replaces = cissns

673 cbj.is_replaced_by = issns

674

675 # "is_replaced_by" means that the current journal is replaced by the new continuation

676 elif type == "is_replaced_by":

677 bibjson.is_replaced_by = cissns

678 cbj.replaces = issns

679

680 # save this journal

681 self.save()

682

683 # save the continuation, and return a copy to the caller

684 j.save()

685 return j

686

687 ####################################################

688 ## admin data methods

689

690 def is_in_doaj(self):

691 return self.__seamless__.get_single("admin.in_doaj", default=False)

692

693 def set_in_doaj(self, value):

694 self.__seamless__.set_with_struct("admin.in_doaj", value)

695

696 def is_ticked(self):

697 return self.__seamless__.get_single("admin.ticked", default=False)

698

699 def set_ticked(self, ticked):

700 self.__seamless__.set_with_struct("admin.ticked", ticked)

701

702 @property

703 def current_application(self):

704 return self.__seamless__.get_single("admin.current_application")

705

706 def set_current_application(self, application_id):

707 self.__seamless__.set_with_struct("admin.current_application", application_id)

708

709 def remove_current_application(self):

710 self.__seamless__.delete("admin.current_application")

711

712 @property

713 def related_applications(self):

714 return self.__seamless__.get_list("admin.related_applications")

715

716 def add_related_application(self, application_id, date_accepted=None, status=None):

717 obj = {"application_id": application_id}

718 self.__seamless__.delete_from_list("admin.related_applications", matchsub=obj)

719 if date_accepted is not None:

720 obj["date_accepted"] = date_accepted

721 if status is not None:

722 obj["status"] = status

723 self.__seamless__.add_to_list_with_struct("admin.related_applications", obj)

724

725 def set_related_applications(self, related_applications_records):

726 self.__seamless__.set_with_struct("admin.related_applications", related_applications_records)

727

728 def remove_related_applications(self):

729 self.__seamless__.delete("admin.related_applications")

730

731 def remove_related_application(self, application_id):

732 self.set_related_applications([r for r in self.related_applications if r.get("application_id") != application_id])

733

734 def related_application_record(self, application_id):

735 for record in self.related_applications:

736 if record.get("application_id") == application_id:

737 return record

738 return None

739

740 def latest_related_application_id(self):

741 related = self.related_applications

742 if len(related) == 0:

743 return None

744 if len(related) == 1:

745 return related[0].get("application_id")

746 sorted(related, key=lambda x: x.get("date_accepted", "1970-01-01T00:00:00Z"))

747 return related[0].get("application_id")

748

749 ########################################################################

750 ## Functions for handling continuations

751

752 def get_future_continuations(self):

753 irb = self.bibjson().is_replaced_by

754 q = ContinuationQuery(irb)

755

756 future = []

757 journals = self.q2obj(q=q.query())

758 subjournals = []

759 for j in journals:

760 subjournals += j.get_future_continuations()

761

762 future = journals + subjournals

763 return future

764

765 def get_past_continuations(self):

766 replaces = self.bibjson().replaces

767 q = ContinuationQuery(replaces)

768

769 past = []

770 journals = self.q2obj(q=q.query())

771 subjournals = []

772 for j in journals:

773 subjournals += j.get_past_continuations()

774

775 past = journals + subjournals

776 return past

777

778 #######################################################################

779

780 #####################################################

781 ## operations we can do to the journal

782

783 def calculate_tick(self):

784 created_date = self.created_date

785 last_update_request = self.last_update_request

786

787 tick_threshold = app.config.get("TICK_THRESHOLD", '2014-03-19T00:00:00Z')

788 threshold = datetime.strptime(tick_threshold, "%Y-%m-%dT%H:%M:%SZ")

789

790 if created_date is None: # don't worry about the last_update_request date - you can't update unless you've been created!

791 # we haven't even saved the record yet. All we need to do is check that the tick

792 # threshold is in the past (which I suppose theoretically it could not be), then

793 # set it

794 if datetime.utcnow() >= threshold:

795 self.set_ticked(True)

796 else:

797 self.set_ticked(False)

798 return

799

800 # otherwise, this is an existing record, and we just need to update it

801

802 # convert the strings to datetime objects

803 created = datetime.strptime(created_date, "%Y-%m-%dT%H:%M:%SZ")

804 lud = None

805 if last_update_request is not None:

806 lud = datetime.strptime(last_update_request, "%Y-%m-%dT%H:%M:%SZ")

807

808 if created >= threshold and self.is_in_doaj():

809 self.set_ticked(True)

810 return

811

812 if lud is not None and lud >= threshold and self.is_in_doaj():

813 self.set_ticked(True)

814 return

815

816 self.set_ticked(False)

817

818 def propagate_in_doaj_status_to_articles(self):

819 for article in self.all_articles():

820 article.set_in_doaj(self.is_in_doaj())

821 article.save()

822

823

824 def prep(self, is_update=True):

825 self._ensure_in_doaj()

826 self.calculate_tick()

827 self._generate_index()

828 self._calculate_has_apc()

829 self._generate_autocompletes()

830 if is_update:

831 self.set_last_updated()

832

833 def save(self, snapshot=True, sync_owner=True, **kwargs):

834 self.prep()

835 self.verify_against_struct()

836 if sync_owner:

837 self._sync_owner_to_application()

838 res = super(Journal, self).save(**kwargs)

839 if snapshot:

840 self.snapshot()

841 return res

842

843 ######################################################

844 ## internal utility methods

845

846 def _generate_autocompletes(self):

847 bj = self.bibjson()

848 publisher = bj.publisher

849 institution = bj.institution

850

851 if publisher is not None:

852 self.__seamless__.set_with_struct("index.publisher_ac", publisher.lower())

853

854 if institution is not None:

855 self.__seamless__.set_with_struct("index.institution_ac", institution.lower())

856

857 def _ensure_in_doaj(self):

858 if self.__seamless__.get_single("admin.in_doaj", default=None) is None:

859 self.set_in_doaj(False)

860

861 def _sync_owner_to_application(self):

862 if self.current_application is None:

863 return

864 from portality.models.v2.application import Application

865 ca = Application.pull(self.current_application)

866 if ca is not None and ca.owner != self.owner:

867 ca.set_owner(self.owner)

868 ca.save(sync_owner=False)

869

870 def _calculate_has_apc(self):

871 # work out of the journal has an apc

872 has_apc = "No Information"

873 apc_present = self.bibjson().has_apc

874 if apc_present:

875 has_apc = "Yes"

876 elif self.is_ticked(): # Because if an item is not ticked we want to say "No Information"

877 has_apc = "No"

878

879 self.__seamless__.set_with_struct("index.has_apc", has_apc)

880

881

882MAPPING_OPTS = {

883 "dynamic": None,

884 "coerces": app.config["DATAOBJ_TO_MAPPING_DEFAULTS"],

885 "exceptions": {

886 "admin.notes.note": {

887 "type": "text",

888 "index": False,

889 # "include_in_all": False # Removed in es6 fixme: do we need to look at copy_to for the mapping?

890 }

891 }

892}

893

894

895########################################################

896## Data Access Queries

897

898class JournalQuery(object):

899 """

900 wrapper around the kinds of queries we want to do against the journal type

901 """

902 issn_query = {

903 "track_total_hits": True,

904 "query": {

905 "bool": {

906 "must": [

907 {

908 "terms": {"index.issn.exact": "<issn>"}

909 }

910 ]

911 }

912 }

913 }

914

915 all_doaj = {

916 "track_total_hits": True,

917 "query": {

918 "bool": {

919 "must": [

920 {"term": {"admin.in_doaj": True}}

921 ]

922 }

923 }

924 }

925

926 _minified_fields = ["id", "bibjson.title", "last_updated"]

927

928 def __init__(self, minified=False, sort_by_title=False):

929 self.query = None

930 self.minified = minified

931 self.sort_by_title = sort_by_title

932

933 def find_by_issn(self, issns, in_doaj=None, max=10):

934 self.query = deepcopy(self.issn_query)

935 self.query["query"]["bool"]["must"][0]["terms"]["index.issn.exact"] = issns

936 if in_doaj is not None:

937 self.query["query"]["bool"]["must"].append({"term": {"admin.in_doaj": in_doaj}})

938 self.query["size"] = max

939

940 def all_in_doaj(self):

941 q = deepcopy(self.all_doaj)

942 if self.minified:

943 q["fields"] = self._minified_fields

944 if self.sort_by_title:

945 q["sort"] = [{"bibjson.title.exact": {"order": "asc"}}]

946 return q

947

948

949class JournalURLQuery(object):

950 def __init__(self, url, in_doaj=None, max=10):

951 self.url = url

952 self.in_doaj = in_doaj

953 self.max = max

954

955 def query(self):

956 q = {

957 "track_total_hits": True,

958 "query": {

959 "bool": {

960 "must": [

961 {

962 "match": {"bibjson.ref.journal.exact": self.url}

963 }

964 ]

965 }

966 },

967 "size" : self.max

968 }

969 if self.in_doaj is not None:

970 q["query"]["bool"]["must"].append({"term": {"admin.in_doaj": self.in_doaj}})

971 return q

972

973

974class IssnQuery(object):

975 def __init__(self, owner, in_doaj=None):

976 self._owner = owner

977 self._in_doaj = in_doaj

978

979 def query(self):

980 musts = [{"term": { "admin.owner.exact": self._owner}}]

981 if self._in_doaj is not None:

982 musts.append({"term": { "admin.in_doaj": self._in_doaj}})

983 return {

984 "track_total_hits": True,

985 "query": {

986 "bool": {

987 "must": musts

988 }

989 },

990 "size": 0,

991 "aggs": {

992 "issns": {

993 "terms": {

994 "field": "index.issn.exact",

995 "size": 10000,

996 "order": { "_key": "asc" }

997 }

998 }

999 }

1000 }

1001

1002

1003class OwnerQuery(object):

1004 """ Query to supply all full journal sources by owner """

1005 base_query = {

1006 "track_total_hits": True,

1007 "query": {

1008 "term": {"admin.owner.exact": "<owner id here>"}

1009 },

1010 "size": 10000,

1011 }

1012

1013 def __init__(self, owner):

1014 self._query = deepcopy(self.base_query)

1015 self._query["query"]["term"]["admin.owner.exact"] = owner

1016

1017 def query(self):

1018 return self._query

1019

1020

1021class PublisherQuery(object):

1022 exact_query = {

1023 "track_total_hits": True,

1024 "query": {

1025 "term": {"bibjson.publisher.name.exact": "<publisher name here>"}

1026 },

1027 "size": 10000

1028 }

1029

1030 inexact_query = {

1031 "track_total_hits": True,

1032 "query": {

1033 "term": {"bibjson.publisher.name": "<publisher name here>"}

1034 },

1035 "size": 10000

1036 }

1037

1038 def __init__(self, publisher, exact=True):

1039 self.publisher = publisher

1040 self.exact = exact

1041

1042 def query(self):

1043 q = None

1044 if self.exact:

1045 q = deepcopy(self.exact_query)

1046 q["query"]["term"]["bibjson.publisher.name.exact"] = self.publisher

1047 else:

1048 q = deepcopy(self.inexact_query)

1049 q["query"]["term"]["bibjson.publisher.name"] = self.publisher.lower()

1050 return q

1051

1052

1053class TitleQuery(object):

1054 base_query = {

1055 "track_total_hits": True,

1056 "query": {

1057 "term": {"index.title.exact": "<title here>"}

1058 },

1059 "size": 10000

1060 }

1061

1062 def __init__(self, title):

1063 self.title = title

1064

1065 def query(self):

1066 q = deepcopy(self.base_query)

1067 q["query"]["term"]["index.title.exact"] = self.title

1068 return q

1069

1070

1071class ContinuationQuery(object):

1072 def __init__(self, issns):

1073 self.issns = issns

1074

1075 def query(self):

1076 return {

1077 "track_total_hits": True,

1078 "query": {

1079 "bool": {

1080 "must": [

1081 {"terms": {"index.issn.exact": self.issns}}

1082 ]

1083 }

1084 },

1085 "size": 10000

1086 }

1087

1088

1089class ArticleStatsQuery(object):

1090 def __init__(self, issns):

1091 self.issns = issns

1092

1093 def query(self):

1094 return {

1095 "track_total_hits": True,

1096 "query": {

1097 "bool": {

1098 "must": [

1099 {"terms": {"index.issn.exact": self.issns}},

1100 {"term": {"admin.in_doaj": True}}

1101 ]

1102 }

1103 },

1104 "size": 1,

1105 "_source": {

1106 "include": ["created_date"]

1107 },

1108 "sort": [{"created_date": {"order": "desc"}}]

1109 }

1110

1111

1112class RecentJournalsQuery(object):

1113 def __init__(self, max):

1114 self.max = max

1115

1116 def query(self):

1117 return {

1118 "track_total_hits": True,

1119 "query" : {"match_all" : {}},

1120 "size" : self.max,

1121 "sort" : [

1122 {"created_date" : {"order" : "desc"}}

1123 ]

1124 }