Coverage for portality/models/v2/journal.py: 92%
659 statements
« prev ^ index » next coverage.py v6.4.2, created at 2022-07-19 18:38 +0100
« prev ^ index » next coverage.py v6.4.2, created at 2022-07-19 18:38 +0100
1from portality.dao import DomainObject
2from portality.core import app
3from portality.models.v2.bibjson import JournalLikeBibJSON
4from portality.models.v2 import shared_structs
5from portality.models.account import Account
6from portality.lib import es_data_mapping, dates, coerce
7from portality.lib.seamless import SeamlessMixin
8from portality.lib.coerce import COERCE_MAP
10from copy import deepcopy
11from datetime import datetime, timedelta
13import string, uuid
14from unidecode import unidecode
16JOURNAL_STRUCT = {
17 "objects": [
18 "admin", "index"
19 ],
21 "structs": {
22 "admin": {
23 "fields": {
24 "in_doaj": {"coerce": "bool"},
25 "ticked": {"coerce": "bool"},
26 "current_application": {"coerce": "unicode"}
27 },
28 "lists": {
29 "related_applications": {"contains": "object"}
30 },
31 "structs": {
32 "related_applications": {
33 "fields": {
34 "application_id": {"coerce": "unicode"},
35 "date_accepted": {"coerce": "utcdatetime"},
36 "status": {"coerce": "unicode"}
37 }
38 },
39 "contact": {
40 "name": {"coerce": "unicode"},
41 "email": {"coerce": "unicode"}
42 }
43 }
44 },
45 "index": {
46 "fields": {
47 "publisher_ac": {"coerce": "unicode"},
48 "institution_ac": {"coerce": "unicode"}
49 }
50 }
51 }
52}
55class ContinuationException(Exception):
56 pass
59class JournalLikeObject(SeamlessMixin, DomainObject):
61 @classmethod
62 def find_by_issn(cls, issns, in_doaj=None, max=10):
63 if not isinstance(issns, list):
64 issns = [issns]
65 q = JournalQuery()
66 q.find_by_issn(issns, in_doaj=in_doaj, max=max)
67 result = cls.query(q=q.query)
68 # create an array of objects, using cls rather than Journal, which means subclasses can use it too
69 records = [cls(**r.get("_source")) for r in result.get("hits", {}).get("hits", [])]
70 return records
72 @classmethod
73 def issns_by_owner(cls, owner, in_doaj=None):
74 q = IssnQuery(owner, in_doaj=in_doaj)
75 res = cls.query(q=q.query())
76 issns = [term.get("key") for term in res.get("aggregations", {}).get("issns", {}).get("buckets", [])]
77 return issns
79 @classmethod
80 def get_by_owner(cls, owner):
81 q = OwnerQuery(owner)
82 res = cls.query(q=q.query())
83 # get_by_owner() in application.py predates this, but I've made it an override because it does application stuff
84 records = [cls(**r.get("_source")) for r in res.get("hits", {}).get("hits", [])]
85 return records
87 @classmethod
88 def issns_by_query(cls, query):
89 issns = []
90 for j in cls.iterate(query):
91 issns += j.known_issns()
92 return issns
94 @classmethod
95 def find_by_journal_url(cls, url, in_doaj=None, max=10):
96 q = JournalURLQuery(url, in_doaj, max)
97 result = cls.query(q=q.query())
98 # create an array of objects, using cls rather than Journal, which means subclasses can use it too
99 records = [cls(**r.get("_source")) for r in result.get("hits", {}).get("hits", [])]
100 return records
102 @classmethod
103 def recent(cls, max=10):
104 q = RecentJournalsQuery(max)
105 result = cls.query(q=q.query())
106 # create an array of objects, using cls rather than Journal, which means subclasses can use it too
107 records = [cls(**r.get("_source")) for r in result.get("hits", {}).get("hits", [])]
108 return records
110 ############################################
111 ## base property methods
113 @property
114 def data(self):
115 return self.__seamless__.data
117 @property
118 def has_apc(self):
119 return self.__seamless__.get_single("bibjson.apc.has_apc")
121 @property
122 def id(self):
123 return self.__seamless__.get_single("id")
125 def set_id(self, id=None):
126 if id is None:
127 id = self.makeid()
128 self.__seamless__.set_with_struct("id", id)
130 def set_created(self, date=None):
131 if date is None:
132 date = dates.now()
133 self.__seamless__.set_with_struct("created_date", date)
135 @property
136 def created_date(self):
137 return self.__seamless__.get_single("created_date")
139 @property
140 def created_timestamp(self):
141 return self.__seamless__.get_single("created_date", coerce=coerce.to_datestamp())
143 def set_last_updated(self, date=None):
144 if date is None:
145 date = dates.now()
146 self.__seamless__.set_with_struct("last_updated", date)
148 @property
149 def last_updated(self):
150 return self.__seamless__.get_single("last_updated")
152 @property
153 def last_updated_timestamp(self):
154 return self.__seamless__.get_single("last_updated", coerce=coerce.to_datestamp())
156 def last_updated_since(self, days=0):
157 return self.last_updated_timestamp > (datetime.utcnow() - timedelta(days=days))
159 def set_last_manual_update(self, date=None):
160 if date is None:
161 date = dates.now()
162 self.__seamless__.set_with_struct("last_manual_update", date)
164 @property
165 def last_manual_update(self):
166 return self.__seamless__.get_single("last_manual_update")
168 @property
169 def last_manual_update_timestamp(self):
170 return self.__seamless__.get_single("last_manual_update", coerce=coerce.to_datestamp())
172 def has_been_manually_updated(self):
173 lmut = self.last_manual_update_timestamp
174 if lmut is None:
175 return False
176 return lmut > datetime.utcfromtimestamp(0)
178 def has_seal(self):
179 return self.__seamless__.get_single("admin.seal", default=False)
181 def set_seal(self, value):
182 self.__seamless__.set_with_struct("admin.seal", value)
184 def has_oa_start_date(self):
185 return self.__seamless__.get_single("bibjson.oa_start", default=False)
187 @property
188 def owner(self):
189 return self.__seamless__.get_single("admin.owner")
191 def set_owner(self, owner):
192 self.__seamless__.set_with_struct("admin.owner", owner)
194 def remove_owner(self):
195 self.__seamless__.delete("admin.owner")
197 @property
198 def owner_account(self):
199 if self.owner:
200 return Account.pull(self.owner)
201 return None
203 @property
204 def editor_group(self):
205 return self.__seamless__.get_single("admin.editor_group")
207 def set_editor_group(self, eg):
208 self.__seamless__.set_with_struct("admin.editor_group", eg)
210 def remove_editor_group(self):
211 self.__seamless__.delete("admin.editor_group")
213 @property
214 def editor(self):
215 return self.__seamless__.get_single("admin.editor")
217 def set_editor(self, ed):
218 self.__seamless__.set_with_struct("admin.editor", ed)
220 def remove_editor(self):
221 self.__seamless__.delete('admin.editor')
223 @property
224 def contact(self):
225 return self.__seamless__.get_single("admin.contact")
227 @property
228 def contact_name(self):
229 return self.__seamless__.get_single("admin.contact.name")
231 @contact_name.setter
232 def contact_name(self, name):
233 self.__seamless__.set_with_struct("admin.contact.name", name)
235 @property
236 def contact_email(self):
237 return self.__seamless__.get_single("admin.contact.email")
239 @contact_email.setter
240 def contact_email(self, email):
241 self.__seamless__.set_with_struct("admin.contact.email", email)
243 def set_contact(self, name, email):
244 self.contact_name = name
245 self.contact_email = email
247 def remove_contact(self):
248 self.__seamless__.delete("admin.contact")
250 def add_note(self, note, date=None, id=None):
251 if not date:
252 date = dates.now()
253 obj = {"date": date, "note": note, "id": id}
254 self.__seamless__.delete_from_list("admin.notes", matchsub=obj)
255 if not id:
256 obj["id"] = uuid.uuid4()
257 self.__seamless__.add_to_list_with_struct("admin.notes", obj)
259 def remove_note(self, note):
260 self.__seamless__.delete_from_list("admin.notes", matchsub=note)
262 def set_notes(self, notes):
263 self.__seamless__.set_with_struct("admin.notes", notes)
265 def remove_notes(self):
266 self.__seamless__.delete("admin.notes")
268 @property
269 def notes(self):
270 return self.__seamless__.get_list("admin.notes")
272 @property
273 def ordered_notes(self):
274 """Orders notes by newest first"""
275 notes = self.notes
276 clusters = {}
277 for note in notes:
278 if "date" not in note:
279 note["date"] = "1970-01-01T00:00:00Z" # this really means something is broken with note date setting, which needs to be fixed
280 if note["date"] not in clusters:
281 clusters[note["date"]] = [note]
282 else:
283 clusters[note["date"]].append(note)
285 ordered_keys = sorted(list(clusters.keys()), reverse=True)
286 ordered = []
287 for key in ordered_keys:
288 clusters[key].reverse()
289 ordered += clusters[key]
290 return ordered
292 def bibjson(self):
293 bj = self.__seamless__.get_single("bibjson")
294 if bj is None:
295 self.__seamless__.set_single("bibjson", {})
296 bj = self.__seamless__.get_single("bibjson")
297 return JournalLikeBibJSON(bj)
299 def set_bibjson(self, bibjson):
300 bibjson = bibjson.data if isinstance(bibjson, JournalLikeBibJSON) else bibjson
301 self.__seamless__.set_with_struct("bibjson", bibjson)
303 ######################################################
304 ## DEPRECATED METHODS
306 def known_issns(self):
307 """
308 DEPRECATED
310 all issns this journal is known by
312 This used to mean "all issns the journal has ever been known by", but that definition has changed since
313 continuations have been separated from the single journal object model.
315 Now this is just a proxy for self.bibjson().issns()
316 """
317 return self.bibjson().issns()
319 def get_latest_contact_name(self):
320 return self.contact_name
322 def get_latest_contact_email(self):
323 return self.contact_email
325 def add_contact(self, name, email):
326 self.set_contact(name, email)
328 def remove_contacts(self):
329 self.remove_contact()
331 ######################################################
332 ## internal utility methods
334 def _generate_index(self):
335 # the index fields we are going to generate
336 titles = []
337 subjects = []
338 schema_subjects = []
339 schema_codes = []
340 schema_codes_tree = []
341 classification = []
342 langs = []
343 country = None
344 license = []
345 publisher = []
346 has_seal = None
347 classification_paths = []
348 unpunctitle = None
349 asciiunpunctitle = None
350 continued = "No"
351 has_editor_group = "No"
352 has_editor = "No"
354 # the places we're going to get those fields from
355 cbib = self.bibjson()
357 # get the title out of the current bibjson
358 if cbib.title is not None:
359 titles.append(cbib.title)
360 if cbib.alternative_title:
361 titles.append(cbib.alternative_title)
363 # get the subjects and concatenate them with their schemes from the current bibjson
364 for subs in cbib.subject:
365 scheme = subs.get("scheme")
366 term = subs.get("term")
367 subjects.append(term)
368 schema_subjects.append(scheme + ":" + term)
369 classification.append(term)
370 if "code" in subs:
371 schema_codes.append(scheme + ":" + subs.get("code"))
373 # now expand the classification to hold all its parent terms too
374 additional = []
375 for c in classification:
376 tp = cbib.term_path(c)
377 if tp is not None:
378 additional += tp
379 classification += additional
381 # add the keywords to the non-schema subjects (but not the classification)
382 subjects += cbib.keywords
384 # get the bibjson object to convert the languages to the english form
385 langs = cbib.language_name()
387 # get the english name of the country
388 country = cbib.country_name()
390 # get the type of the licenses
391 for l in cbib.licences:
392 license.append(l.get("type"))
394 # deduplicate the lists
395 titles = list(set(titles))
396 subjects = list(set(subjects))
397 schema_subjects = list(set(schema_subjects))
398 classification = list(set(classification))
399 license = list(set(license))
400 schema_codes = list(set(schema_codes))
402 # determine if the seal is applied
403 has_seal = "Yes" if self.has_seal() else "No"
405 # get the full classification paths for the subjects
406 classification_paths = cbib.lcc_paths()
407 schema_codes_tree = cbib.lcc_codes_full_list()
409 # create an unpunctitle
410 if cbib.title is not None:
411 throwlist = string.punctuation + '\n\t'
412 unpunctitle = "".join(c for c in cbib.title if c not in throwlist).strip()
413 try:
414 asciiunpunctitle = unidecode(unpunctitle)
415 except:
416 asciiunpunctitle = unpunctitle
418 # record if this journal object is a continuation
419 if len(cbib.replaces) > 0 or len(cbib.is_replaced_by) > 0:
420 continued = "Yes"
422 if self.editor_group is not None:
423 has_editor_group = "Yes"
425 if self.editor is not None:
426 has_editor = "Yes"
428 # build the index part of the object
429 index = {}
431 if country is not None:
432 index["country"] = country
433 if has_seal:
434 index["has_seal"] = has_seal
435 if unpunctitle is not None:
436 index["unpunctitle"] = unpunctitle
437 if asciiunpunctitle is not None:
438 index["asciiunpunctitle"] = asciiunpunctitle
439 index["continued"] = continued
440 index["has_editor_group"] = has_editor_group
441 index["has_editor"] = has_editor
443 index["issn"] = cbib.issns()
444 if len(titles) > 0:
445 index["title"] = titles
446 if len(subjects) > 0:
447 index["subject"] = subjects
448 if len(schema_subjects) > 0:
449 index["schema_subject"] = schema_subjects
450 if len(classification) > 0:
451 index["classification"] = classification
452 if len(langs) > 0:
453 index["language"] = langs
454 if len(license) > 0:
455 index["license"] = license
456 if len(classification_paths) > 0:
457 index["classification_paths"] = classification_paths
458 if len(schema_codes) > 0:
459 index["schema_code"] = schema_codes
460 if len(schema_codes_tree) > 0:
461 index["schema_codes_tree"] = schema_codes_tree
463 self.__seamless__.set_with_struct("index", index)
466class Journal(JournalLikeObject):
467 __type__ = "journal"
469 __SEAMLESS_STRUCT__ = [
470 shared_structs.JOURNAL_BIBJSON,
471 shared_structs.SHARED_JOURNAL_LIKE,
472 JOURNAL_STRUCT
473 ]
475 __SEAMLESS_COERCE__ = COERCE_MAP
477 def __init__(self, **kwargs):
478 # FIXME: hack, to deal with ES integration layer being improperly abstracted
479 if "_source" in kwargs:
480 kwargs = kwargs["_source"]
481 # FIXME: I have taken this out for the moment, as I'm not sure it's what we should be doing
482 #if kwargs:
483 # self.add_autogenerated_fields(**kwargs)
484 super(Journal, self).__init__(raw=kwargs)
486 @classmethod
487 def add_autogenerated_fields(cls, **kwargs):
488 bib = kwargs["bibjson"]
489 if "apc" in bib and bib["apc"] != '':
490 bib["apc"]["has_apc"] = len(bib["apc"]["max"]) != 0
491 else:
492 bib["apc"] = {"has_apc": False}
493 if "deposit_policy" in bib and bib["deposit_policy"] != []:
494 bib["deposit_policy"]["has_policy"] = True
495 else:
496 ##change made in https://github.com/DOAJ/doaj/commit/e507123f423fe16fd270744055da0129e2b32005
497 bib["deposit_policy"] = {"has_policy": False}
498 if "other_charges" in bib and bib["other_charges"] != '':
499 bib["other_charges"]["has_other_charges"] = bib["other_charges"]["url"] is not None
500 else:
501 bib["other_charges"] = {"has_other_charges": False}
502 if "copyright" in bib and bib["copyright"]["url"] != '':
503 bib["copyright"]["author_retains"] = bib["copyright"]["url"] is not None
504 else:
505 bib["copyright"] = {"author_retains": False}
506 if "pid_scheme" in bib and bib["pid_scheme"] != '':
507 bib["pid_scheme"]["has_pid_scheme"] = len(bib["pid_scheme"]["scheme"]) != 0
508 else:
509 bib["pid_scheme"] = {"has_pid_scheme": False}
510 if "preservation" in bib and bib["preservation"] != '':
511 bib["preservation"]["has_preservation"] = (len(bib["preservation"]) != 0 or
512 bib["national_library"] is not None)
513 else:
514 bib["preservation"] = {"has_preservation": True}
516 #####################################################
517 ## Journal-specific data access methods
519 @classmethod
520 def all_in_doaj(cls, page_size=5000):
521 q = JournalQuery()
522 return cls.iterate(q.all_in_doaj(), page_size=page_size, wrap=True)
524 @classmethod
525 def find_by_publisher(cls, publisher, exact=True):
526 q = PublisherQuery(publisher, exact)
527 result = cls.query(q=q.query())
528 records = [Journal(**r.get("_source")) for r in result.get("hits", {}).get("hits", [])]
529 return records
531 @classmethod
532 def find_by_title(cls, title):
533 q = TitleQuery(title)
534 result = cls.query(q=q.query())
535 records = [Journal(**r.get("_source")) for r in result.get("hits", {}).get("hits", [])]
536 return records
538 @classmethod
539 def delete_selected(cls, query, articles=False, snapshot_journals=True, snapshot_articles=True):
540 if articles:
541 # list the issns of all the journals
542 issns = cls.issns_by_query(query)
544 # issue a delete request over all the articles by those issns
545 from portality.models import Article
546 Article.delete_by_issns(issns, snapshot=snapshot_articles)
548 # snapshot the journal record
549 if snapshot_journals:
550 js = cls.iterate(query, page_size=1000)
551 for j in js:
552 j.snapshot()
554 # finally issue a delete request against the journals
555 cls.delete_by_query(query)
557 def all_articles(self):
558 from portality.models import Article
559 return Article.find_by_issns(self.known_issns())
561 def article_stats(self):
562 from portality.models import Article
563 q = ArticleStatsQuery(self.known_issns())
564 data = Article.query(q=q.query())
565 hits = data.get("hits", {})
566 total = hits.get("total", {}).get('value', 0)
567 latest = None
568 if total > 0:
569 latest = hits.get("hits", [])[0].get("_source").get("created_date")
570 return {
571 "total": total,
572 "latest": latest
573 }
575 def mappings(self):
576 return es_data_mapping.create_mapping(self.__seamless_struct__.raw, MAPPING_OPTS)
578 ############################################
579 ## base property methods
581 @property
582 def toc_id(self):
583 id_ = self.bibjson().get_preferred_issn()
584 if not id_:
585 id_ = self.id
586 return id_
588 @property
589 def last_update_request(self):
590 related = self.related_applications
591 if len(related) == 0:
592 return None
593 sorted(related, key=lambda x: x.get("date_accepted", "1970-01-01T00:00:00Z"))
594 return related[0].get("date_accepted", "1970-01-01T00:00:00Z")
596 ############################################################
597 ## revision history methods
599 def snapshot(self):
600 from portality.models import JournalHistory
602 snap = deepcopy(self.data)
603 if "id" in snap:
604 snap["about"] = snap["id"]
605 del snap["id"]
606 if "index" in snap:
607 del snap["index"]
608 if "last_updated" in snap:
609 del snap["last_updated"]
610 if "created_date" in snap:
611 del snap["created_date"]
613 hist = JournalHistory(**snap)
614 hist.save()
616 #######################################################################
617 ## Conversion methods
619 def make_continuation(self, type, eissn=None, pissn=None, title=None):
620 # check that the type is one we know. Must be either 'replaces' or 'is_replaced_by'
621 if type not in ["replaces", "is_replaced_by"]:
622 raise ContinuationException("type must be one of 'replaces' or 'is_replaced_by'")
624 if eissn is None and pissn is None:
625 raise ContinuationException("You must create a continuation with at least one issn")
627 # take a copy of the raw data for this journal, and the issns for this journal
628 raw_cont = deepcopy(self.data)
629 bibjson = self.bibjson()
630 issns = bibjson.issns()
631 cissns = []
633 # make a new instance of the journal - this will be our continuation
634 del raw_cont["id"]
635 del raw_cont["created_date"]
636 del raw_cont["last_updated"]
637 j = Journal(**raw_cont)
639 # ensure that the journal is NOT in doaj. That will be for the admin to decide
640 j.set_in_doaj(False)
642 # get a copy of the continuation's bibjson, then remove the existing issns
643 cbj = j.bibjson()
644 del cbj.eissn
645 del cbj.pissn
647 # also remove any existing continuation information
648 del cbj.replaces
649 del cbj.is_replaced_by
650 del cbj.discontinued_date
652 # now write the new identifiers
653 if eissn is not None and eissn != "":
654 cissns.append(eissn)
655 cbj.eissn = eissn
656 if pissn is not None and pissn != "":
657 cissns.append(pissn)
658 cbj.pissn = pissn
660 # update the title
661 if title is not None:
662 cbj.title = title
664 # now add the issns of the original journal in the appropriate field
665 #
666 # This is a bit confusing - because we're asking this of a Journal object, the relationship type we're asking
667 # for relates to this journal, not to the continuation we are creating. This means that when setting the
668 # new continuations properties, we have to do the opposite to what we do to the journal's properties
669 #
670 # "replaces" means that the current journal replaces the new continuation
671 if type == "replaces":
672 bibjson.replaces = cissns
673 cbj.is_replaced_by = issns
675 # "is_replaced_by" means that the current journal is replaced by the new continuation
676 elif type == "is_replaced_by":
677 bibjson.is_replaced_by = cissns
678 cbj.replaces = issns
680 # save this journal
681 self.save()
683 # save the continuation, and return a copy to the caller
684 j.save()
685 return j
687 ####################################################
688 ## admin data methods
690 def is_in_doaj(self):
691 return self.__seamless__.get_single("admin.in_doaj", default=False)
693 def set_in_doaj(self, value):
694 self.__seamless__.set_with_struct("admin.in_doaj", value)
696 def is_ticked(self):
697 return self.__seamless__.get_single("admin.ticked", default=False)
699 def set_ticked(self, ticked):
700 self.__seamless__.set_with_struct("admin.ticked", ticked)
702 @property
703 def current_application(self):
704 return self.__seamless__.get_single("admin.current_application")
706 def set_current_application(self, application_id):
707 self.__seamless__.set_with_struct("admin.current_application", application_id)
709 def remove_current_application(self):
710 self.__seamless__.delete("admin.current_application")
712 @property
713 def related_applications(self):
714 return self.__seamless__.get_list("admin.related_applications")
716 def add_related_application(self, application_id, date_accepted=None, status=None):
717 obj = {"application_id": application_id}
718 self.__seamless__.delete_from_list("admin.related_applications", matchsub=obj)
719 if date_accepted is not None:
720 obj["date_accepted"] = date_accepted
721 if status is not None:
722 obj["status"] = status
723 self.__seamless__.add_to_list_with_struct("admin.related_applications", obj)
725 def set_related_applications(self, related_applications_records):
726 self.__seamless__.set_with_struct("admin.related_applications", related_applications_records)
728 def remove_related_applications(self):
729 self.__seamless__.delete("admin.related_applications")
731 def remove_related_application(self, application_id):
732 self.set_related_applications([r for r in self.related_applications if r.get("application_id") != application_id])
734 def related_application_record(self, application_id):
735 for record in self.related_applications:
736 if record.get("application_id") == application_id:
737 return record
738 return None
740 def latest_related_application_id(self):
741 related = self.related_applications
742 if len(related) == 0:
743 return None
744 if len(related) == 1:
745 return related[0].get("application_id")
746 sorted(related, key=lambda x: x.get("date_accepted", "1970-01-01T00:00:00Z"))
747 return related[0].get("application_id")
749 ########################################################################
750 ## Functions for handling continuations
752 def get_future_continuations(self):
753 irb = self.bibjson().is_replaced_by
754 q = ContinuationQuery(irb)
756 future = []
757 journals = self.q2obj(q=q.query())
758 subjournals = []
759 for j in journals:
760 subjournals += j.get_future_continuations()
762 future = journals + subjournals
763 return future
765 def get_past_continuations(self):
766 replaces = self.bibjson().replaces
767 q = ContinuationQuery(replaces)
769 past = []
770 journals = self.q2obj(q=q.query())
771 subjournals = []
772 for j in journals:
773 subjournals += j.get_past_continuations()
775 past = journals + subjournals
776 return past
778 #######################################################################
780 #####################################################
781 ## operations we can do to the journal
783 def calculate_tick(self):
784 created_date = self.created_date
785 last_update_request = self.last_update_request
787 tick_threshold = app.config.get("TICK_THRESHOLD", '2014-03-19T00:00:00Z')
788 threshold = datetime.strptime(tick_threshold, "%Y-%m-%dT%H:%M:%SZ")
790 if created_date is None: # don't worry about the last_update_request date - you can't update unless you've been created!
791 # we haven't even saved the record yet. All we need to do is check that the tick
792 # threshold is in the past (which I suppose theoretically it could not be), then
793 # set it
794 if datetime.utcnow() >= threshold:
795 self.set_ticked(True)
796 else:
797 self.set_ticked(False)
798 return
800 # otherwise, this is an existing record, and we just need to update it
802 # convert the strings to datetime objects
803 created = datetime.strptime(created_date, "%Y-%m-%dT%H:%M:%SZ")
804 lud = None
805 if last_update_request is not None:
806 lud = datetime.strptime(last_update_request, "%Y-%m-%dT%H:%M:%SZ")
808 if created >= threshold and self.is_in_doaj():
809 self.set_ticked(True)
810 return
812 if lud is not None and lud >= threshold and self.is_in_doaj():
813 self.set_ticked(True)
814 return
816 self.set_ticked(False)
818 def propagate_in_doaj_status_to_articles(self):
819 for article in self.all_articles():
820 article.set_in_doaj(self.is_in_doaj())
821 article.save()
824 def prep(self, is_update=True):
825 self._ensure_in_doaj()
826 self.calculate_tick()
827 self._generate_index()
828 self._calculate_has_apc()
829 self._generate_autocompletes()
830 if is_update:
831 self.set_last_updated()
833 def save(self, snapshot=True, sync_owner=True, **kwargs):
834 self.prep()
835 self.verify_against_struct()
836 if sync_owner:
837 self._sync_owner_to_application()
838 res = super(Journal, self).save(**kwargs)
839 if snapshot:
840 self.snapshot()
841 return res
843 ######################################################
844 ## internal utility methods
846 def _generate_autocompletes(self):
847 bj = self.bibjson()
848 publisher = bj.publisher
849 institution = bj.institution
851 if publisher is not None:
852 self.__seamless__.set_with_struct("index.publisher_ac", publisher.lower())
854 if institution is not None:
855 self.__seamless__.set_with_struct("index.institution_ac", institution.lower())
857 def _ensure_in_doaj(self):
858 if self.__seamless__.get_single("admin.in_doaj", default=None) is None:
859 self.set_in_doaj(False)
861 def _sync_owner_to_application(self):
862 if self.current_application is None:
863 return
864 from portality.models.v2.application import Application
865 ca = Application.pull(self.current_application)
866 if ca is not None and ca.owner != self.owner:
867 ca.set_owner(self.owner)
868 ca.save(sync_owner=False)
870 def _calculate_has_apc(self):
871 # work out of the journal has an apc
872 has_apc = "No Information"
873 apc_present = self.bibjson().has_apc
874 if apc_present:
875 has_apc = "Yes"
876 elif self.is_ticked(): # Because if an item is not ticked we want to say "No Information"
877 has_apc = "No"
879 self.__seamless__.set_with_struct("index.has_apc", has_apc)
882MAPPING_OPTS = {
883 "dynamic": None,
884 "coerces": app.config["DATAOBJ_TO_MAPPING_DEFAULTS"],
885 "exceptions": {
886 "admin.notes.note": {
887 "type": "text",
888 "index": False,
889 # "include_in_all": False # Removed in es6 fixme: do we need to look at copy_to for the mapping?
890 }
891 }
892}
895########################################################
896## Data Access Queries
898class JournalQuery(object):
899 """
900 wrapper around the kinds of queries we want to do against the journal type
901 """
902 issn_query = {
903 "track_total_hits": True,
904 "query": {
905 "bool": {
906 "must": [
907 {
908 "terms": {"index.issn.exact": "<issn>"}
909 }
910 ]
911 }
912 }
913 }
915 all_doaj = {
916 "track_total_hits": True,
917 "query": {
918 "bool": {
919 "must": [
920 {"term": {"admin.in_doaj": True}}
921 ]
922 }
923 }
924 }
926 _minified_fields = ["id", "bibjson.title", "last_updated"]
928 def __init__(self, minified=False, sort_by_title=False):
929 self.query = None
930 self.minified = minified
931 self.sort_by_title = sort_by_title
933 def find_by_issn(self, issns, in_doaj=None, max=10):
934 self.query = deepcopy(self.issn_query)
935 self.query["query"]["bool"]["must"][0]["terms"]["index.issn.exact"] = issns
936 if in_doaj is not None:
937 self.query["query"]["bool"]["must"].append({"term": {"admin.in_doaj": in_doaj}})
938 self.query["size"] = max
940 def all_in_doaj(self):
941 q = deepcopy(self.all_doaj)
942 if self.minified:
943 q["fields"] = self._minified_fields
944 if self.sort_by_title:
945 q["sort"] = [{"bibjson.title.exact": {"order": "asc"}}]
946 return q
949class JournalURLQuery(object):
950 def __init__(self, url, in_doaj=None, max=10):
951 self.url = url
952 self.in_doaj = in_doaj
953 self.max = max
955 def query(self):
956 q = {
957 "track_total_hits": True,
958 "query": {
959 "bool": {
960 "must": [
961 {
962 "match": {"bibjson.ref.journal.exact": self.url}
963 }
964 ]
965 }
966 },
967 "size" : self.max
968 }
969 if self.in_doaj is not None:
970 q["query"]["bool"]["must"].append({"term": {"admin.in_doaj": self.in_doaj}})
971 return q
974class IssnQuery(object):
975 def __init__(self, owner, in_doaj=None):
976 self._owner = owner
977 self._in_doaj = in_doaj
979 def query(self):
980 musts = [{"term": { "admin.owner.exact": self._owner}}]
981 if self._in_doaj is not None:
982 musts.append({"term": { "admin.in_doaj": self._in_doaj}})
983 return {
984 "track_total_hits": True,
985 "query": {
986 "bool": {
987 "must": musts
988 }
989 },
990 "size": 0,
991 "aggs": {
992 "issns": {
993 "terms": {
994 "field": "index.issn.exact",
995 "size": 10000,
996 "order": { "_key": "asc" }
997 }
998 }
999 }
1000 }
1003class OwnerQuery(object):
1004 """ Query to supply all full journal sources by owner """
1005 base_query = {
1006 "track_total_hits": True,
1007 "query": {
1008 "term": {"admin.owner.exact": "<owner id here>"}
1009 },
1010 "size": 10000,
1011 }
1013 def __init__(self, owner):
1014 self._query = deepcopy(self.base_query)
1015 self._query["query"]["term"]["admin.owner.exact"] = owner
1017 def query(self):
1018 return self._query
1021class PublisherQuery(object):
1022 exact_query = {
1023 "track_total_hits": True,
1024 "query": {
1025 "term": {"bibjson.publisher.name.exact": "<publisher name here>"}
1026 },
1027 "size": 10000
1028 }
1030 inexact_query = {
1031 "track_total_hits": True,
1032 "query": {
1033 "term": {"bibjson.publisher.name": "<publisher name here>"}
1034 },
1035 "size": 10000
1036 }
1038 def __init__(self, publisher, exact=True):
1039 self.publisher = publisher
1040 self.exact = exact
1042 def query(self):
1043 q = None
1044 if self.exact:
1045 q = deepcopy(self.exact_query)
1046 q["query"]["term"]["bibjson.publisher.name.exact"] = self.publisher
1047 else:
1048 q = deepcopy(self.inexact_query)
1049 q["query"]["term"]["bibjson.publisher.name"] = self.publisher.lower()
1050 return q
1053class TitleQuery(object):
1054 base_query = {
1055 "track_total_hits": True,
1056 "query": {
1057 "term": {"index.title.exact": "<title here>"}
1058 },
1059 "size": 10000
1060 }
1062 def __init__(self, title):
1063 self.title = title
1065 def query(self):
1066 q = deepcopy(self.base_query)
1067 q["query"]["term"]["index.title.exact"] = self.title
1068 return q
1071class ContinuationQuery(object):
1072 def __init__(self, issns):
1073 self.issns = issns
1075 def query(self):
1076 return {
1077 "track_total_hits": True,
1078 "query": {
1079 "bool": {
1080 "must": [
1081 {"terms": {"index.issn.exact": self.issns}}
1082 ]
1083 }
1084 },
1085 "size": 10000
1086 }
1089class ArticleStatsQuery(object):
1090 def __init__(self, issns):
1091 self.issns = issns
1093 def query(self):
1094 return {
1095 "track_total_hits": True,
1096 "query": {
1097 "bool": {
1098 "must": [
1099 {"terms": {"index.issn.exact": self.issns}},
1100 {"term": {"admin.in_doaj": True}}
1101 ]
1102 }
1103 },
1104 "size": 1,
1105 "_source": {
1106 "include": ["created_date"]
1107 },
1108 "sort": [{"created_date": {"order": "desc"}}]
1109 }
1112class RecentJournalsQuery(object):
1113 def __init__(self, max):
1114 self.max = max
1116 def query(self):
1117 return {
1118 "track_total_hits": True,
1119 "query" : {"match_all" : {}},
1120 "size" : self.max,
1121 "sort" : [
1122 {"created_date" : {"order" : "desc"}}
1123 ]
1124 }