Coverage for portality/models/v1/journal.py: 0%
898 statements
« prev ^ index » next coverage.py v6.4.2, created at 2022-07-19 16:52 +0100
« prev ^ index » next coverage.py v6.4.2, created at 2022-07-19 16:52 +0100
1from portality.dao import DomainObject
2from portality.core import app
3from portality.models.v1.bibjson import GenericBibJSON
4from portality.models.v1 import shared_structs
5from portality.lib import dataobj, es_data_mapping, dates
6from portality import datasets
8from copy import deepcopy
9from datetime import datetime
11import string
12from unidecode import unidecode
14class ContinuationException(Exception):
15 pass
17class JournalLikeObject(dataobj.DataObj, DomainObject):
19 @classmethod
20 def find_by_issn(cls, issns, in_doaj=None, max=10):
21 if not isinstance(issns, list):
22 issns = [issns]
23 q = JournalQuery()
24 q.find_by_issn(issns, in_doaj=in_doaj, max=max)
25 result = cls.query(q=q.query)
26 # create an arry of objects, using cls rather than Journal, which means subclasses can use it too (i.e. Suggestion)
27 records = [cls(**r.get("_source")) for r in result.get("hits", {}).get("hits", [])]
28 return records
30 @classmethod
31 def issns_by_owner(cls, owner):
32 q = IssnQuery(owner)
33 res = cls.query(q=q.query())
34 issns = [term.get("keys") for term in res.get("aggregations", {}).get("issns", {}).get("buckets", [])]
35 return issns
37 @classmethod
38 def issns_by_query(cls, query):
39 issns = []
40 for j in cls.iterate(query):
41 issns += j.known_issns()
42 return issns
44 ############################################
45 ## base property methods
47 @property
48 def id(self):
49 return self._get_single("id")
51 def set_id(self, id=None):
52 if id is None:
53 id = self.makeid()
54 self._set_with_struct("id", id)
56 def set_created(self, date=None):
57 if date is None:
58 date = dates.now()
59 self._set_with_struct("created_date", date)
61 @property
62 def created_date(self):
63 return self._get_single("created_date")
65 @property
66 def created_timestamp(self):
67 return self._get_single("created_date", coerce=dataobj.to_datestamp())
69 def set_last_updated(self, date=None):
70 if date is None:
71 date = dates.now()
72 self._set_with_struct("last_updated", date)
74 @property
75 def last_updated(self):
76 return self._get_single("last_updated")
78 @property
79 def last_updated_timestamp(self):
80 return self._get_single("last_updated", coerce=dataobj.to_datestamp())
82 def bibjson(self):
83 bj = self._get_single("bibjson")
84 if bj is None:
85 self._set_single("bibjson", {})
86 bj = self._get_single("bibjson")
87 return JournalBibJSON(bj)
89 def set_bibjson(self, bibjson):
90 bibjson = bibjson.data if isinstance(bibjson, JournalBibJSON) else bibjson
91 self._set_with_struct("bibjson", bibjson)
93 def set_last_manual_update(self, date=None):
94 if date is None:
95 date = dates.now()
96 self._set_with_struct("last_manual_update", date)
98 @property
99 def last_manual_update(self):
100 return self._get_single("last_manual_update")
102 @property
103 def last_manual_update_timestamp(self):
104 return self._get_single("last_manual_update", coerce=dataobj.to_datestamp())
106 def has_been_manually_updated(self):
107 return self.last_manual_update_timestamp > datetime.utcfromtimestamp(0)
109 def contacts(self):
110 return self._get_single("admin.contact")
112 def get_latest_contact_name(self):
113 try:
114 contact = self.contacts()[-1]
115 except IndexError as e:
116 return ""
117 return contact.get("name", "")
119 def get_latest_contact_email(self):
120 try:
121 contact = self.contacts()[-1]
122 except IndexError as e:
123 return ""
124 return contact.get("email", "")
126 def add_contact(self, name, email):
127 self._add_to_list_with_struct("admin.contact", {"name" : name, "email" : email})
129 def remove_contacts(self):
130 self._delete("admin.contact")
132 def add_note(self, note, date=None):
133 if date is None:
134 date = dates.now()
135 obj = {"date" : date, "note" : note}
136 self._delete_from_list("admin.notes", matchsub=obj)
137 self._add_to_list_with_struct("admin.notes", obj)
139 def remove_note(self, note):
140 self._delete_from_list("admin.notes", matchsub=note)
142 def set_notes(self, notes):
143 self._set_with_struct("admin.notes", notes)
145 def remove_notes(self):
146 self._delete("admin.notes")
148 @property
149 def notes(self):
150 return self._get_list("admin.notes")
152 @property
153 def ordered_notes(self):
154 notes = self.notes
155 clusters = {}
156 for note in notes:
157 if note["date"] not in clusters:
158 clusters[note["date"]] = [note]
159 else:
160 clusters[note["date"]].append(note)
161 ordered_keys = sorted(list(clusters.keys()), reverse=True)
162 ordered = []
163 for key in ordered_keys:
164 clusters[key].reverse()
165 ordered += clusters[key]
166 return ordered
167 # return sorted(notes, key=lambda x: x["date"], reverse=True)
169 @property
170 def owner(self):
171 return self._get_single("admin.owner")
173 def set_owner(self, owner):
174 self._set_with_struct("admin.owner", owner)
176 def remove_owner(self):
177 self._delete("admin.owner")
179 @property
180 def editor_group(self):
181 return self._get_single("admin.editor_group")
183 def set_editor_group(self, eg):
184 self._set_with_struct("admin.editor_group", eg)
186 def remove_editor_group(self):
187 self._delete("admin.editor_group")
189 @property
190 def editor(self):
191 return self._get_single("admin.editor")
193 def set_editor(self, ed):
194 self._set_with_struct("admin.editor", ed)
196 def remove_editor(self):
197 self._delete('admin.editor')
199 def known_issns(self):
200 """
201 DEPRECATED
203 all issns this journal is known by
205 This used to mean "all issns the journal has ever been known by", but that definition has changed since
206 continuations have been separated from the single journal object model.
208 Now this is just a proxy for self.bibjson().issns()
209 """
210 return self.bibjson().issns()
212 def has_seal(self):
213 return self._get_single("admin.seal", default=False)
215 def set_seal(self, value):
216 self._set_with_struct("admin.seal", value)
218 @property
219 def bulk_upload_id(self):
220 return self._get_single("admin.bulk_upload")
222 def set_bulk_upload_id(self, bulk_upload_id):
223 self._set_with_struct("admin.bulk_upload", bulk_upload_id)
225 ######################################################
226 ## internal utility methods
228 def _generate_index(self):
229 # the index fields we are going to generate
230 issns = []
231 titles = []
232 subjects = []
233 schema_subjects = []
234 schema_codes = []
235 classification = []
236 langs = []
237 country = None
238 license = []
239 publisher = []
240 urls = {}
241 has_seal = None
242 classification_paths = []
243 unpunctitle = None
244 asciiunpunctitle = None
245 continued = "No"
246 has_editor_group = "No"
247 has_editor = "No"
249 # the places we're going to get those fields from
250 cbib = self.bibjson()
252 # get the issns out of the current bibjson
253 issns += cbib.get_identifiers(cbib.P_ISSN)
254 issns += cbib.get_identifiers(cbib.E_ISSN)
256 # get the title out of the current bibjson
257 if cbib.title is not None:
258 titles.append(cbib.title)
260 # get the subjects and concatenate them with their schemes from the current bibjson
261 for subs in cbib.subjects():
262 scheme = subs.get("scheme")
263 term = subs.get("term")
264 subjects.append(term)
265 schema_subjects.append(scheme + ":" + term)
266 classification.append(term)
267 if "code" in subs:
268 schema_codes.append(scheme + ":" + subs.get("code"))
270 # add the keywords to the non-schema subjects (but not the classification)
271 subjects += cbib.keywords
273 # get the bibjson object to convert the languages to the english form
274 langs = cbib.language_name()
276 # get the english name of the country
277 country = cbib.country_name()
279 # get the title of the license
280 lic = cbib.get_license()
281 if lic is not None:
282 license.append(lic.get("title"))
284 # copy the publisher/institution
285 if cbib.publisher:
286 publisher.append(cbib.publisher)
287 if cbib.institution:
288 publisher.append(cbib.institution)
290 # extract and convert all of the urls by their type
291 links = cbib.get_urls()
292 for link in links:
293 lt = link.get("type")
294 if lt is not None:
295 urls[lt + "_url"] = link.get("url")
297 # deduplicate the lists
298 issns = list(set(issns))
299 titles = list(set(titles))
300 subjects = list(set(subjects))
301 schema_subjects = list(set(schema_subjects))
302 classification = list(set(classification))
303 license = list(set(license))
304 publisher = list(set(publisher))
305 schema_codes = list(set(schema_codes))
307 # determine if the seal is applied
308 has_seal = "Yes" if self.has_seal() else "No"
310 # get the full classification paths for the subjects
311 classification_paths = cbib.lcc_paths()
313 # create an unpunctitle
314 if cbib.title is not None:
315 throwlist = string.punctuation + '\n\t'
316 unpunctitle = "".join(c for c in cbib.title if c not in throwlist).strip()
317 try:
318 asciiunpunctitle = unidecode(unpunctitle)
319 except:
320 asciiunpunctitle = unpunctitle
322 # record if this journal object is a continuation
323 if len(cbib.replaces) > 0 or len(cbib.is_replaced_by) > 0:
324 continued = "Yes"
326 if self.editor_group is not None:
327 has_editor_group = "Yes"
329 if self.editor is not None:
330 has_editor = "Yes"
332 # build the index part of the object
333 index = {}
334 if len(issns) > 0:
335 index["issn"] = issns
336 if len(titles) > 0:
337 index["title"] = titles
338 if len(subjects) > 0:
339 index["subject"] = subjects
340 if len(schema_subjects) > 0:
341 index["schema_subject"] = schema_subjects
342 if len(classification) > 0:
343 index["classification"] = classification
344 if len(publisher) > 0:
345 index["publisher"] = publisher
346 if len(license) > 0:
347 index["license"] = license
348 if len(langs) > 0:
349 index["language"] = langs
350 if country is not None:
351 index["country"] = country
352 if len(schema_codes) > 0:
353 index["schema_code"] = schema_codes
354 if len(list(urls.keys())) > 0:
355 index.update(urls)
356 if has_seal:
357 index["has_seal"] = has_seal
358 if len(classification_paths) > 0:
359 index["classification_paths"] = classification_paths
360 if unpunctitle is not None:
361 index["unpunctitle"] = unpunctitle
362 if asciiunpunctitle is not None:
363 index["asciiunpunctitle"] = asciiunpunctitle
364 index["continued"] = continued
365 index["has_editor_group"] = has_editor_group
366 index["has_editor"] = has_editor
367 self._set_with_struct("index", index)
369class Journal(JournalLikeObject):
370 __type__ = "journal"
372 def __init__(self, **kwargs):
373 # FIXME: hack, to deal with ES integration layer being improperly abstracted
374 if "_source" in kwargs:
375 kwargs = kwargs["_source"]
376 self._add_struct(shared_structs.SHARED_BIBJSON)
377 self._add_struct(shared_structs.JOURNAL_BIBJSON_EXTENSION)
378 self._add_struct(JOURNAL_STRUCT)
379 super(Journal, self).__init__(raw=kwargs)
381 #####################################################
382 ## Journal-specific data access methods
384 @classmethod
385 def all_in_doaj(cls, page_size=5000):
386 q = JournalQuery()
387 return cls.iterate(q.all_in_doaj(), page_size=page_size, wrap=True)
389 @classmethod
390 def find_by_publisher(cls, publisher, exact=True):
391 q = PublisherQuery(publisher, exact)
392 result = cls.query(q=q.query())
393 records = [Journal(**r.get("_source")) for r in result.get("hits", {}).get("hits", [])]
394 return records
396 @classmethod
397 def find_by_title(cls, title):
398 q = TitleQuery(title)
399 result = cls.query(q=q.query())
400 records = [Journal(**r.get("_source")) for r in result.get("hits", {}).get("hits", [])]
401 return records
403 @classmethod
404 def delete_selected(cls, query, articles=False, snapshot_journals=True, snapshot_articles=True):
405 if articles:
406 # list the issns of all the journals
407 issns = cls.issns_by_query(query)
409 # issue a delete request over all the articles by those issns
410 from portality.models import Article
411 Article.delete_by_issns(issns, snapshot=snapshot_articles)
413 # snapshot the journal record
414 if snapshot_journals:
415 js = cls.iterate(query, page_size=1000)
416 for j in js:
417 j.snapshot()
419 # finally issue a delete request against the journals
420 cls.delete_by_query(query)
422 def all_articles(self):
423 from portality.models import Article
424 return Article.find_by_issns(self.known_issns())
426 def article_stats(self):
427 from portality.models import Article
428 q = ArticleStatsQuery(self.known_issns())
429 data = Article.query(q=q.query())
430 hits = data.get("hits", {})
431 total = hits.get("total", {}).get('value', 0)
432 latest = None
433 if total > 0:
434 latest = hits.get("hits", [])[0].get("_source").get("created_date")
435 return {
436 "total" : total,
437 "latest" : latest
438 }
440 def mappings(self):
441 return es_data_mapping.create_mapping(self.get_struct(), MAPPING_OPTS)
443 ############################################
444 ## base property methods
446 @property
447 def toc_id(self):
448 bibjson = self.bibjson()
449 id_ = bibjson.get_one_identifier(bibjson.E_ISSN)
450 if not id_:
451 id_ = bibjson.get_one_identifier(bibjson.P_ISSN)
452 if not id_:
453 id_ = self.id
454 return id_
456 @property
457 def last_update_request(self):
458 related = self.related_applications
459 if len(related) == 0:
460 return None
461 sorted(related, key=lambda x : x.get("date_accepted", "1970-01-01T00:00:00Z"))
462 return related[0].get("date_accepted", "1970-01-01T00:00:00Z")
464 ############################################################
465 ## revision history methods
467 def snapshot(self):
468 from portality.models import JournalHistory
470 snap = deepcopy(self.data)
471 if "id" in snap:
472 snap["about"] = snap["id"]
473 del snap["id"]
474 if "index" in snap:
475 del snap["index"]
476 if "last_updated" in snap:
477 del snap["last_updated"]
478 if "created_date" in snap:
479 del snap["created_date"]
481 hist = JournalHistory(**snap)
482 hist.save()
484 #######################################################################
485 ## Conversion methods
487 def make_continuation(self, type, eissn=None, pissn=None, title=None):
488 # check that the type is one we know. Must be either 'replaces' or 'is_replaced_by'
489 if type not in ["replaces", "is_replaced_by"]:
490 raise ContinuationException("type must be one of 'replaces' or 'is_replaced_by'")
492 if eissn is None and pissn is None:
493 raise ContinuationException("You must create a continuation with at least one issn")
495 # take a copy of the raw data for this journal, and the issns for this journal
496 raw_cont = deepcopy(self.data)
497 bibjson = self.bibjson()
498 issns = bibjson.issns()
499 cissns = []
501 # make a new instance of the journal - this will be our continuation
502 del raw_cont["id"]
503 del raw_cont["created_date"]
504 del raw_cont["last_updated"]
505 j = Journal(**raw_cont)
507 # ensure that the journal is NOT in doaj. That will be for the admin to decide
508 j.set_in_doaj(False)
510 # get a copy of the continuation's bibjson, then remove the existing issns
511 cbj = j.bibjson()
512 cbj.remove_identifiers(cbj.E_ISSN)
513 cbj.remove_identifiers(cbj.P_ISSN)
515 # also remove any existing continuation information
516 del cbj.replaces
517 del cbj.is_replaced_by
518 del cbj.discontinued_date
520 # now write the new identifiers
521 if eissn is not None and eissn != "":
522 cissns.append(eissn)
523 cbj.add_identifier(cbj.E_ISSN, eissn)
524 if pissn is not None and pissn != "":
525 cissns.append(pissn)
526 cbj.add_identifier(cbj.P_ISSN, pissn)
528 # update the title
529 if title is not None:
530 cbj.title = title
532 # now add the issns of the original journal in the appropriate field
533 #
534 # This is a bit confusing - because we're asking this of a Journal object, the relationship type we're asking
535 # for relates to this journal, not to the continuation we are creating. This means that when setting the
536 # new continuations properties, we have to do the opposite to what we do to the journal's properties
537 #
538 # "replaces" means that the current journal replaces the new continuation
539 if type == "replaces":
540 bibjson.replaces = cissns
541 cbj.is_replaced_by = issns
543 # "is_replaced_by" means that the current journal is replaced by the new continuation
544 elif type == "is_replaced_by":
545 bibjson.is_replaced_by = cissns
546 cbj.replaces = issns
548 # save this journal
549 self.save()
551 # save the continuation, and return a copy to the caller
552 j.save()
553 return j
555 ####################################################
556 ## admin data methods
558 def is_in_doaj(self):
559 return self._get_single("admin.in_doaj", default=False)
561 def set_in_doaj(self, value):
562 self._set_with_struct("admin.in_doaj", value)
564 @property
565 def current_application(self):
566 return self._get_single("admin.current_application")
568 def set_current_application(self, application_id):
569 self._set_with_struct("admin.current_application", application_id)
571 def remove_current_application(self):
572 self._delete("admin.current_application")
574 @property
575 def related_applications(self):
576 return self._get_list("admin.related_applications")
578 def add_related_application(self, application_id, date_accepted=None, status=None):
579 obj = {"application_id" : application_id}
580 self._delete_from_list("admin.related_applications", matchsub=obj)
581 if date_accepted is not None:
582 obj["date_accepted"] = date_accepted
583 if status is not None:
584 obj["status"] = status
585 self._add_to_list_with_struct("admin.related_applications", obj)
587 def set_related_applications(self, related_applications_records):
588 self._set_with_struct("admin.related_applications", related_applications_records)
590 def remove_related_applications(self):
591 self._delete("admin.related_applications")
593 def related_application_record(self, application_id):
594 for record in self.related_applications:
595 if record.get("application_id") == application_id:
596 return record
597 return None
599 def latest_related_application_id(self):
600 related = self.related_applications
601 if len(related) == 0:
602 return None
603 if len(related) == 1:
604 return related[0].get("application_id")
605 sorted(related, key=lambda x: x.get("date_accepted", "1970-01-01T00:00:00Z"))
606 return related[0].get("application_id")
608 def is_ticked(self):
609 return self._get_single("admin.ticked", default=False)
611 def set_ticked(self, ticked):
612 self._set_with_struct("admin.ticked", ticked)
614 @property
615 def toc_id(self):
616 bibjson = self.bibjson()
617 id_ = bibjson.get_one_identifier(bibjson.E_ISSN)
618 if not id_:
619 id_ = bibjson.get_one_identifier(bibjson.P_ISSN)
620 if not id_:
621 id_ = self.id
622 return id_
624 ########################################################################
625 ## Functions for handling continuations
627 def get_future_continuations(self):
628 irb = self.bibjson().is_replaced_by
629 q = ContinuationQuery(irb)
631 future = []
632 journals = self.q2obj(q=q.query())
633 subjournals = []
634 for j in journals:
635 subjournals += j.get_future_continuations()
637 future = journals + subjournals
638 return future
640 def get_past_continuations(self):
641 replaces = self.bibjson().replaces
642 q = ContinuationQuery(replaces)
644 past = []
645 journals = self.q2obj(q=q.query())
646 subjournals = []
647 for j in journals:
648 subjournals += j.get_past_continuations()
650 past = journals + subjournals
651 return past
653 #######################################################################
655 #####################################################
656 ## operations we can do to the journal
658 def calculate_tick(self):
659 created_date = self.created_date
660 last_update_request = self.last_update_request
662 tick_threshold = app.config.get("TICK_THRESHOLD", '2014-03-19T00:00:00Z')
663 threshold = datetime.strptime(tick_threshold, "%Y-%m-%dT%H:%M:%SZ")
665 if created_date is None: # don't worry about the last_update_request date - you can't update unless you've been created!
666 # we haven't even saved the record yet. All we need to do is check that the tick
667 # threshold is in the past (which I suppose theoretically it could not be), then
668 # set it
669 if datetime.utcnow() >= threshold:
670 self.set_ticked(True)
671 else:
672 self.set_ticked(False)
673 return
675 # otherwise, this is an existing record, and we just need to update it
677 # convert the strings to datetime objects
678 created = datetime.strptime(created_date, "%Y-%m-%dT%H:%M:%SZ")
679 lud = None
680 if last_update_request is not None:
681 lud = datetime.strptime(last_update_request, "%Y-%m-%dT%H:%M:%SZ")
683 if created >= threshold and self.is_in_doaj():
684 self.set_ticked(True)
685 return
687 if lud is not None and lud >= threshold and self.is_in_doaj():
688 self.set_ticked(True)
689 return
691 self.set_ticked(False)
693 def propagate_in_doaj_status_to_articles(self):
694 for article in self.all_articles():
695 article.set_in_doaj(self.is_in_doaj())
696 article.save()
698 def prep(self):
699 self._ensure_in_doaj()
700 self.calculate_tick()
701 self._generate_index()
702 self._calculate_has_apc()
703 self._generate_autocompletes()
704 self.set_last_updated()
706 def save(self, snapshot=True, sync_owner=True, **kwargs):
707 self.prep()
708 self.check_construct()
709 if sync_owner:
710 self._sync_owner_to_application()
711 res = super(Journal, self).save(**kwargs)
712 if snapshot:
713 self.snapshot()
714 return res
716 ######################################################
717 ## internal utility methods
719 def _generate_autocompletes(self):
720 bj = self.bibjson()
721 publisher = bj.publisher
722 institution = bj.institution
723 provider = bj.provider
725 if publisher is not None:
726 self._set_with_struct("index.publisher_ac", publisher.lower())
728 if institution is not None:
729 self._set_with_struct("index.institution_ac", institution.lower())
731 if provider is not None:
732 self._set_with_struct("index.provider_ac", provider.lower())
734 def _calculate_has_apc(self):
735 # work out of the journal has an apc
736 has_apc = "No Information"
737 apc_field_present = len(list(self.bibjson().apc.keys())) > 0
738 if apc_field_present:
739 has_apc = "Yes"
740 elif self.is_ticked():
741 has_apc = "No"
743 self._set_with_struct("index.has_apc", has_apc)
745 def _ensure_in_doaj(self):
746 # switching active to false takes the item out of the DOAJ
747 # though note that switching active to True does not put something IN the DOAJ
748 if not self.bibjson().active:
749 self.set_in_doaj(False)
751 def _sync_owner_to_application(self):
752 if self.current_application is None:
753 return
754 from portality.models import Suggestion
755 ca = Suggestion.pull(self.current_application)
756 if ca is not None and ca.owner != self.owner:
757 ca.set_owner(self.owner)
758 ca.save(sync_owner=False)
761class JournalBibJSON(GenericBibJSON):
762 def __init__(self, bibjson=None):
763 self._add_struct(shared_structs.SHARED_BIBJSON.get("structs", {}).get("bibjson"))
764 self._add_struct(shared_structs.JOURNAL_BIBJSON_EXTENSION.get("structs", {}).get("bibjson"))
765 super(JournalBibJSON, self).__init__(bibjson)
767 ############################################################
768 # journal-specific simple property getter and setters
770 @property
771 def alternative_title(self):
772 return self._get_single("alternative_title")
774 @alternative_title.setter
775 def alternative_title(self, val):
776 self._set_with_struct("alternative_title", val)
778 @property
779 def author_pays_url(self):
780 return self._get_single("author_pays_url")
782 @author_pays_url.setter
783 def author_pays_url(self, val):
784 self._set_with_struct("author_pays_url", val)
786 @property
787 def author_pays(self):
788 return self._get_single("author_pays")
790 @author_pays.setter
791 def author_pays(self, val):
792 self._set_with_struct("author_pays", val)
794 @author_pays.deleter
795 def author_pays(self):
796 self._delete("author_pays")
798 @property
799 def country(self):
800 return self._get_single("country")
802 @country.setter
803 def country(self, val):
804 self._set_with_struct("country", val)
806 def country_name(self):
807 if self.country is not None:
808 return datasets.get_country_name(self.country)
809 return None
811 @property
812 def publisher(self):
813 return self._get_single("publisher")
815 @publisher.setter
816 def publisher(self, val):
817 self._set_with_struct("publisher", val)
819 @property
820 def provider(self):
821 return self._get_single("provider")
823 @provider.setter
824 def provider(self, val):
825 self._set_with_struct("provider", val)
827 @property
828 def institution(self):
829 return self._get_single("institution")
831 @institution.setter
832 def institution(self, val):
833 self._set_with_struct("institution", val)
835 @property
836 def active(self):
837 return self._get_single("active", default=True)
839 @active.setter
840 def active(self, val):
841 self._set_with_struct("active", val)
843 @property
844 def replaces(self):
845 return self._get_list("replaces")
847 @replaces.setter
848 def replaces(self, val):
849 self._set_with_struct("replaces", val)
851 @replaces.deleter
852 def replaces(self):
853 self._delete("replaces")
855 def add_replaces(self, val):
856 self._add_to_list_with_struct("replaces", val)
858 @property
859 def is_replaced_by(self):
860 return self._get_list("is_replaced_by")
862 @is_replaced_by.setter
863 def is_replaced_by(self, val):
864 self._set_with_struct("is_replaced_by", val)
866 @is_replaced_by.deleter
867 def is_replaced_by(self):
868 self._delete("is_replaced_by")
870 def add_is_replaced_by(self, val):
871 self._add_to_list_with_struct("is_replaced_by", val)
873 @property
874 def discontinued_date(self):
875 return self._get_single("discontinued_date")
877 @discontinued_date.setter
878 def discontinued_date(self, val):
879 self._set_with_struct("discontinued_date", val)
881 @discontinued_date.deleter
882 def discontinued_date(self):
883 self._delete("discontinued_date")
885 @property
886 def discontinued_datestamp(self):
887 return self._get_single("discontinued_date", coerce=dataobj.to_datestamp())
889 ########################################################
890 # journal-specific complex part getters and setters
892 @property
893 def language(self):
894 return self._get_list("language")
896 def language_name(self):
897 # copy the languages and convert them to their english forms
898 langs = [datasets.name_for_lang(l) for l in self.language]
899 uc = dataobj.to_unicode()
900 langs = [uc(l) for l in langs]
901 return list(set(langs))
903 def set_language(self, language):
904 self._set_with_struct("language", language)
906 def add_language(self, language):
907 self._add_to_list_with_struct("language", language)
909 def set_license(self, license_title, license_type, url=None, version=None, open_access=None,
910 by=None, sa=None, nc=None, nd=None,
911 embedded=None, embedded_example_url=None):
913 # FIXME: why is there not a "remove license" function
914 if not license_title and not license_type: # something wants to delete the license
915 self._delete("license")
916 return
918 lobj = {"title" : license_title, "type" : license_type}
919 if url is not None:
920 lobj["url"] = url
921 if version is not None:
922 lobj["version"] = version
923 if open_access is not None:
924 lobj["open_access"] = open_access
925 if by is not None:
926 lobj["BY"] = by
927 if sa is not None:
928 lobj["SA"] = sa
929 if nc is not None:
930 lobj["NC"] = nc
931 if nd is not None:
932 lobj["ND"] = nd
933 if embedded is not None:
934 lobj["embedded"] = embedded
935 if embedded_example_url is not None:
936 lobj["embedded_example_url"] = embedded_example_url
938 self._set_with_struct("license", [lobj])
941 def get_license(self):
942 ll = self._get_list("license")
943 if len(ll) > 0:
944 return ll[0]
945 return None
947 def get_license_type(self):
948 lobj = self.get_license()
949 if lobj is not None:
950 return lobj['type']
951 return None
953 @property
954 def open_access(self):
955 return self.get_license().get("open_access", False)
957 def set_open_access(self, open_access):
958 existing = self.get_license()
959 if existing is None:
960 existing = {}
961 existing["open_access"] = open_access
962 self._set_with_struct("license", existing)
964 def set_oa_start(self, year=None, *args, **kwargs):
965 """
966 Volume and Number are deprecated
967 """
968 oaobj = {}
969 if year is not None:
970 oaobj["year"] = year
971 self._set_with_struct("oa_start", oaobj)
973 @property
974 def oa_start(self):
975 return self._get_single("oa_start", default={})
977 def set_oa_end(self, year=None, *args, **kwargs):
978 """
979 Volume and Number are deprecated
980 """
981 oaobj = {}
982 if year is not None:
983 oaobj["year"] = year
984 self._set_with_struct("oa_end", oaobj)
986 @property
987 def oa_end(self):
988 return self._get_single("oa_end", default={})
990 def set_apc(self, currency, average_price):
991 self._set_with_struct("apc.currency", currency)
992 self._set_with_struct("apc.average_price", average_price)
994 @property
995 def apc_url(self):
996 return self._get_single("apc_url")
998 @apc_url.setter
999 def apc_url(self, val):
1000 self._set_with_struct("apc_url", val)
1002 @property
1003 def apc(self):
1004 return self._get_single("apc", default={})
1006 def set_submission_charges(self, currency, average_price):
1007 self._set_with_struct("submission_charges.currency", currency)
1008 self._set_with_struct("submission_charges.average_price", average_price)
1010 @property
1011 def submission_charges_url(self):
1012 return self._get_single("submission_charges_url")
1014 @submission_charges_url.setter
1015 def submission_charges_url(self, val):
1016 self._set_with_struct("submission_charges_url", val)
1018 @property
1019 def submission_charges(self):
1020 return self._get_single("submission_charges", default={})
1022 """
1023 The below methods work with data stored in this format:
1024 {
1025 "other" : "other value"
1026 "nat_lib" : "library value",
1027 "known" : ["known values"],
1028 "url" : "url>
1029 }
1030 But they need to receive and expose data in the original external form:
1031 {
1032 "policy" : [
1033 "<known policy type (e.g. LOCKSS)>",
1034 ["<policy category>", "<previously unknown policy type>"]
1035 ],
1036 "url" : "<url to policy information page>"
1037 }
1038 """
1040 def set_archiving_policy(self, policies, policy_url):
1041 obj = {}
1042 known = []
1043 for p in policies:
1044 if isinstance(p, list):
1045 k, v = p
1046 if k.lower() == "other":
1047 obj["other"] = v
1048 elif k.lower() == "a national library":
1049 obj["nat_lib"] = v
1050 else:
1051 known.append(p)
1052 if len(known) > 0:
1053 obj["known"] = known
1054 if policy_url is not None:
1055 obj["url"] = policy_url
1057 self._set_with_struct("archiving_policy", obj)
1059 def add_archiving_policy(self, policy_name):
1060 if isinstance(policy_name, list):
1061 k, v = policy_name
1062 if k.lower() == "other":
1063 self._set_with_struct("archiving_policy.other", v)
1064 elif k.lower() == "a national library":
1065 self._set_with_struct("archiving_policy.nat_lib", v)
1066 else:
1067 self._add_to_list_with_struct("archiving_policy.known", policy_name)
1069 @property
1070 def archiving_policy(self):
1071 ap = self._get_single("archiving_policy", default={})
1072 ret = {"policy" : []}
1073 if "url" in ap:
1074 ret["url"] = ap["url"]
1075 if "known" in ap:
1076 ret["policy"] += ap["known"]
1077 if "nat_lib" in ap:
1078 ret["policy"].append(["A national library", ap["nat_lib"]])
1079 if "other" in ap:
1080 ret["policy"].append(["Other", ap["other"]])
1081 return ret
1083 @property
1084 def flattened_archiving_policies(self):
1085 ap = self._get_single("archiving_policy", default={})
1086 ret = []
1087 if "known" in ap:
1088 ret += ap["known"]
1089 if "nat_lib" in ap:
1090 ret.append("A national library: " + ap["nat_lib"])
1091 if "other" in ap:
1092 ret.append("Other: " + ap["other"])
1094 return ret
1096 def set_editorial_review(self, process, review_url):
1097 self._set_with_struct("editorial_review.process", process)
1098 self._set_with_struct("editorial_review.url", review_url)
1100 @property
1101 def editorial_review(self):
1102 return self._get_single("editorial_review", default={})
1104 def set_plagiarism_detection(self, url, has_detection=True):
1105 self._set_with_struct("plagiarism_detection.detection", has_detection)
1106 self._set_with_struct("plagiarism_detection.url", url)
1108 @property
1109 def plagiarism_detection(self):
1110 return self._get_single("plagiarism_detection", default={})
1112 def set_article_statistics(self, url, has_statistics=True):
1113 self._set_with_struct("article_statistics.statistics", has_statistics)
1114 self._set_with_struct("article_statistics.url", url)
1116 @property
1117 def article_statistics(self):
1118 return self._get_single("article_statistics", default={})
1120 @property
1121 def deposit_policy(self):
1122 return self._get_list("deposit_policy")
1124 @deposit_policy.setter
1125 def deposit_policy(self, policies):
1126 self._set_with_struct("deposit_policy", policies)
1128 def add_deposit_policy(self, policy):
1129 self._add_to_list_with_struct("deposit_policy", policy)
1131 def set_author_copyright(self, url, holds_copyright=True):
1132 self._set_with_struct("author_copyright.copyright", holds_copyright)
1133 self._set_with_struct("author_copyright.url", url)
1135 @property
1136 def author_copyright(self):
1137 return self._get_single("author_copyright", default={})
1139 def set_author_publishing_rights(self, url, holds_rights=True):
1140 self._set_with_struct("author_publishing_rights.publishing_rights", holds_rights)
1141 self._set_with_struct("author_publishing_rights.url", url)
1143 @property
1144 def author_publishing_rights(self):
1145 return self._get_single("author_publishing_rights", default={})
1147 @property
1148 def allows_fulltext_indexing(self):
1149 return self._get_single("allows_fulltext_indexing")
1151 @allows_fulltext_indexing.setter
1152 def allows_fulltext_indexing(self, allows):
1153 self._set_with_struct("allows_fulltext_indexing", allows)
1155 @property
1156 def persistent_identifier_scheme(self):
1157 return self._get_list("persistent_identifier_scheme")
1159 @persistent_identifier_scheme.setter
1160 def persistent_identifier_scheme(self, schemes):
1161 self._set_with_struct("persistent_identifier_scheme", schemes)
1163 def add_persistent_identifier_scheme(self, scheme):
1164 self._add_to_list_with_struct("persistent_identifier_scheme", scheme)
1166 @property
1167 def format(self):
1168 return self._get_list("format")
1170 @format.setter
1171 def format(self, form):
1172 self._set_with_struct("format", form)
1174 def add_format(self, form):
1175 self._add_to_list_with_struct("format", form)
1177 @property
1178 def publication_time(self):
1179 return self._get_single("publication_time")
1181 @publication_time.setter
1182 def publication_time(self, weeks):
1183 self._set_with_struct("publication_time", weeks)
1185 # to help with ToC - we prefer to refer to a journal by E-ISSN, or
1186 # if not, then P-ISSN
1187 def get_preferred_issn(self):
1188 issn = self.get_one_identifier(self.E_ISSN)
1189 if not issn:
1190 issn = self.get_one_identifier(self.P_ISSN)
1191 return issn
1193JOURNAL_STRUCT = {
1194 "fields" : {
1195 "id" : {"coerce" : "unicode"},
1196 "created_date" : {"coerce" : "utcdatetime"},
1197 "last_updated" : {"coerce" : "utcdatetime"},
1198 "last_reapplication" : {"coerce" : "utcdatetime"},
1199 "last_manual_update" : {"coerce" : "utcdatetime"}
1200 },
1201 "objects" : [
1202 "admin", "index"
1203 ],
1205 "structs" : {
1206 "admin" : {
1207 "fields" : {
1208 "in_doaj" : {"coerce" : "bool"},
1209 "ticked" : {"coerce" : "bool"},
1210 "seal" : {"coerce" : "bool"},
1211 "bulk_upload" : {"coerce" : "unicode"},
1212 "owner" : {"coerce" : "unicode"},
1213 "editor_group" : {"coerce" : "unicode"},
1214 "editor" : {"coerce" : "unicode"},
1215 "current_application" : {"coerce" : "unicode"}
1216 },
1217 "lists" : {
1218 "contact" : {"contains" : "object"},
1219 "notes" : {"contains" : "object"},
1220 "related_applications" : {"contains" : "object"}
1221 },
1222 "structs" : {
1223 "contact" : {
1224 "fields" : {
1225 "email" : {"coerce" : "unicode"},
1226 "name" : {"coerce" : "unicode"}
1227 }
1228 },
1229 "notes" : {
1230 "fields" : {
1231 "note" : {"coerce" : "unicode"},
1232 "date" : {"coerce" : "utcdatetime"}
1233 }
1234 },
1235 "related_applications" : {
1236 "fields" : {
1237 "application_id" : {"coerce" : "unicode"},
1238 "date_accepted" : {"coerce" : "utcdatetime"},
1239 "status" : {"coerce" : "unicode"}
1240 }
1241 }
1242 }
1243 },
1244 "index" : {
1245 "fields" : {
1246 "country" : {"coerce" : "unicode"},
1247 "homepage_url" : {"coerce" : "unicode"},
1248 "waiver_policy_url" : {"coerce" : "unicode"},
1249 "editorial_board_url" : {"coerce" : "unicode"},
1250 "aims_scope_url" : {"coerce" : "unicode"},
1251 "author_instructions_url" : {"coerce" : "unicode"},
1252 "oa_statement_url" : {"coerce" : "unicode"},
1253 "has_apc" : {"coerce" : "unicode"},
1254 "has_seal" : {"coerce" : "unicode"},
1255 "unpunctitle" : {"coerce" : "unicode"},
1256 "asciiunpunctitle" : {"coerce" : "unicode"},
1257 "continued" : {"coerce" : "unicode"},
1258 "has_editor_group" : {"coerce" : "unicode"},
1259 "has_editor" : {"coerce" : "unicode"},
1260 "publisher_ac" : {"coerce" : "unicode"},
1261 "institution_ac" : {"coerce" : "unicode"},
1262 "provider_ac" : {"coerce" : "unicode"}
1263 },
1264 "lists" : {
1265 "issn" : {"contains" : "field", "coerce" : "unicode"},
1266 "title" : {"contains" : "field", "coerce" : "unicode"},
1267 "subject" : {"contains" : "field", "coerce" : "unicode"},
1268 "schema_subject" : {"contains" : "field", "coerce" : "unicode"},
1269 "classification" : {"contains" : "field", "coerce" : "unicode"},
1270 "language" : {"contains" : "field", "coerce" : "unicode"},
1271 "license" : {"contains" : "field", "coerce" : "unicode"},
1272 "classification_paths" : {"contains" : "field", "coerce" : "unicode"},
1273 "schema_code" : {"contains" : "field", "coerce" : "unicode"},
1274 "publisher" : {"contains" : "field", "coerce" : "unicode"}
1275 }
1276 }
1277 }
1278}
1280MAPPING_OPTS = {
1281 "dynamic": None,
1282 "coerces": app.config["DATAOBJ_TO_MAPPING_DEFAULTS"],
1283 "exceptions": {
1284 "admin.notes.note": {
1285 "type": "text",
1286 "index": False,
1287 #"include_in_all": False # Removed in es6 fixme: do we need to look at copy_to for the mapping?
1288 }
1289 }
1290}
1292########################################################
1293## Data Access Queries
1295class JournalQuery(object):
1296 """
1297 wrapper around the kinds of queries we want to do against the journal type
1298 """
1299 issn_query = {
1300 "track_total_hits" : True,
1301 "query": {
1302 "bool": {
1303 "must": [
1304 {
1305 "terms" : { "index.issn.exact" : "<issn>" }
1306 }
1307 ]
1308 }
1309 }
1310 }
1312 all_doaj = {
1313 "track_total_hits" : True,
1314 "query" : {
1315 "bool" : {
1316 "must" : [
1317 {"term" : {"admin.in_doaj" : True}}
1318 ]
1319 }
1320 }
1321 }
1323 _minified_fields = ["id", "bibjson.title", "last_updated"]
1325 def __init__(self, minified=False, sort_by_title=False):
1326 self.query = None
1327 self.minified = minified
1328 self.sort_by_title = sort_by_title
1330 def find_by_issn(self, issns, in_doaj=None, max=10):
1331 self.query = deepcopy(self.issn_query)
1332 self.query["query"]["bool"]["must"][0]["terms"]["index.issn.exact"] = issns
1333 if in_doaj is not None:
1334 self.query["query"]["bool"]["must"].append({"term" : {"admin.in_doaj" : in_doaj}})
1335 self.query["size"] = max
1337 def all_in_doaj(self):
1338 q = deepcopy(self.all_doaj)
1339 if self.minified:
1340 q["fields"] = self._minified_fields
1341 if self.sort_by_title:
1342 q["sort"] = [{"bibjson.title.exact" : {"order" : "asc"}}]
1343 return q
1345class IssnQuery(object):
1346 base_query = {
1347 "track_total_hits": True,
1348 "query" : {
1349 "term" : { "admin.owner.exact" : "<owner id here>" }
1350 },
1351 "size" : 0,
1352 "aggs" : {
1353 "issns" : {
1354 "terms" : {
1355 "field" : "index.issn.exact",
1356 "size" : 10000,
1357 "order" : {"_key" : "asc"}
1358 }
1359 }
1360 }
1361 }
1363 def __init__(self, owner):
1364 self._query = deepcopy(self.base_query)
1365 self._query["query"]["term"]["admin.owner.exact"] = owner
1367 def query(self):
1368 return self._query
1370class PublisherQuery(object):
1371 exact_query = {
1372 "track_total_hits": True,
1373 "query" : {
1374 "term" : {"index.publisher.exact" : "<publisher name here>"}
1375 },
1376 "size": 10000
1377 }
1379 inexact_query = {
1380 "track_total_hits": True,
1381 "query" : {
1382 "term" : {"index.publisher" : "<publisher name here>"}
1383 },
1384 "size": 10000
1385 }
1387 def __init__(self, publisher, exact=True):
1388 self.publisher = publisher
1389 self.exact = exact
1391 def query(self):
1392 q = None
1393 if self.exact:
1394 q = deepcopy(self.exact_query)
1395 q["query"]["term"]["index.publisher.exact"] = self.publisher
1396 else:
1397 q = deepcopy(self.inexact_query)
1398 q["query"]["term"]["index.publisher"] = self.publisher.lower()
1399 return q
1401class TitleQuery(object):
1402 base_query = {
1403 "track_total_hits": True,
1404 "query" : {
1405 "term" : {"index.title.exact" : "<title here>"}
1406 },
1407 "size": 10000
1408 }
1410 def __init__(self, title):
1411 self.title = title
1413 def query(self):
1414 q = deepcopy(self.base_query)
1415 q["query"]["term"]["index.title.exact"] = self.title
1416 return q
1418class ContinuationQuery(object):
1419 def __init__(self, issns):
1420 self.issns = issns
1422 def query(self):
1423 return {
1424 "track_total_hits": True,
1425 "query" : {
1426 "bool" : {
1427 "must" : [
1428 {"terms" : {"index.issn.exact" : self.issns}}
1429 ]
1430 }
1431 },
1432 "size" : 10000
1433 }
1435class ArticleStatsQuery(object):
1436 def __init__(self, issns):
1437 self.issns = issns
1439 def query(self):
1440 return {
1441 "track_total_hits": True,
1442 "query" : {
1443 "bool" : {
1444 "must" : [
1445 {"terms" : {"index.issn.exact" : self.issns}},
1446 {"term" : {"admin.in_doaj" : True}}
1447 ]
1448 }
1449 },
1450 "size" : 1,
1451 "_source" : {
1452 "include" : ["created_date"]
1453 },
1454 "sort" : [{"created_date" : {"order" : "desc"}}]
1455 }