Coverage for portality / models / v1 / journal.py: 0%
899 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-04 09:41 +0100
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-04 09:41 +0100
1from portality.dao import DomainObject
2from portality.core import app
3from portality.lib.dates import DEFAULT_TIMESTAMP_VAL
4from portality.models.v1.bibjson import GenericBibJSON
5from portality.models.v1 import shared_structs
6from portality.lib import dataobj, es_data_mapping, dates
7from portality import datasets
9from copy import deepcopy
10from datetime import datetime
12import string
13from unidecode import unidecode
15class ContinuationException(Exception):
16 pass
18class JournalLikeObject(dataobj.DataObj, DomainObject):
20 @classmethod
21 def find_by_issn(cls, issns, in_doaj=None, max=10):
22 if not isinstance(issns, list):
23 issns = [issns]
24 q = JournalQuery()
25 q.find_by_issn(issns, in_doaj=in_doaj, max=max)
26 result = cls.query(q=q.query)
27 # create an arry of objects, using cls rather than Journal, which means subclasses can use it too (i.e. Suggestion)
28 records = [cls(**r.get("_source")) for r in result.get("hits", {}).get("hits", [])]
29 return records
31 @classmethod
32 def issns_by_owner(cls, owner):
33 q = IssnQuery(owner)
34 res = cls.query(q=q.query())
35 issns = [term.get("keys") for term in res.get("aggregations", {}).get("issns", {}).get("buckets", [])]
36 return issns
38 @classmethod
39 def issns_by_query(cls, query):
40 issns = []
41 for j in cls.iterate(query):
42 issns += j.known_issns()
43 return issns
45 ############################################
46 ## base property methods
48 @property
49 def id(self):
50 return self._get_single("id")
52 def set_id(self, id=None):
53 if id is None:
54 id = self.makeid()
55 self._set_with_struct("id", id)
57 def set_created(self, date=None):
58 if date is None:
59 date = dates.now_str()
60 self._set_with_struct("created_date", date)
62 @property
63 def created_date(self):
64 return self._get_single("created_date")
66 @property
67 def created_timestamp(self):
68 return self._get_single("created_date", coerce=dataobj.to_datestamp())
70 def set_last_updated(self, date=None):
71 if date is None:
72 date = dates.now_str()
73 self._set_with_struct("last_updated", date)
75 @property
76 def last_updated(self):
77 return self._get_single("last_updated")
79 @property
80 def last_updated_timestamp(self):
81 return self._get_single("last_updated", coerce=dataobj.to_datestamp())
83 def bibjson(self):
84 bj = self._get_single("bibjson")
85 if bj is None:
86 self._set_single("bibjson", {})
87 bj = self._get_single("bibjson")
88 return JournalBibJSON(bj)
90 def set_bibjson(self, bibjson):
91 bibjson = bibjson.data if isinstance(bibjson, JournalBibJSON) else bibjson
92 self._set_with_struct("bibjson", bibjson)
94 def set_last_manual_update(self, date=None):
95 if date is None:
96 date = dates.now_str()
97 self._set_with_struct("last_manual_update", date)
99 @property
100 def last_manual_update(self):
101 return self._get_single("last_manual_update")
103 @property
104 def last_manual_update_timestamp(self):
105 return self._get_single("last_manual_update", coerce=dataobj.to_datestamp())
107 def has_been_manually_updated(self):
108 return self.last_manual_update_timestamp > datetime.utcfromtimestamp(0)
110 def contacts(self):
111 return self._get_single("admin.contact")
113 def get_latest_contact_name(self):
114 try:
115 contact = self.contacts()[-1]
116 except IndexError as e:
117 return ""
118 return contact.get("name", "")
120 def get_latest_contact_email(self):
121 try:
122 contact = self.contacts()[-1]
123 except IndexError as e:
124 return ""
125 return contact.get("email", "")
127 def add_contact(self, name, email):
128 self._add_to_list_with_struct("admin.contact", {"name" : name, "email" : email})
130 def remove_contacts(self):
131 self._delete("admin.contact")
133 def add_note(self, note, date=None):
134 if date is None:
135 date = dates.now_str()
136 obj = {"date" : date, "note" : note}
137 self._delete_from_list("admin.notes", matchsub=obj)
138 self._add_to_list_with_struct("admin.notes", obj)
140 def remove_note(self, note):
141 self._delete_from_list("admin.notes", matchsub=note)
143 def set_notes(self, notes):
144 self._set_with_struct("admin.notes", notes)
146 def remove_notes(self):
147 self._delete("admin.notes")
149 @property
150 def notes(self):
151 return self._get_list("admin.notes")
153 @property
154 def ordered_notes(self):
155 notes = self.notes
156 clusters = {}
157 for note in notes:
158 if note["date"] not in clusters:
159 clusters[note["date"]] = [note]
160 else:
161 clusters[note["date"]].append(note)
162 ordered_keys = sorted(list(clusters.keys()), reverse=True)
163 ordered = []
164 for key in ordered_keys:
165 clusters[key].reverse()
166 ordered += clusters[key]
167 return ordered
168 # return sorted(notes, key=lambda x: x["date"], reverse=True)
170 @property
171 def owner(self):
172 return self._get_single("admin.owner")
174 def set_owner(self, owner):
175 self._set_with_struct("admin.owner", owner)
177 def remove_owner(self):
178 self._delete("admin.owner")
180 @property
181 def editor_group(self):
182 return self._get_single("admin.editor_group")
184 def set_editor_group(self, eg):
185 self._set_with_struct("admin.editor_group", eg)
187 def remove_editor_group(self):
188 self._delete("admin.editor_group")
190 @property
191 def editor(self):
192 return self._get_single("admin.editor")
194 def set_editor(self, ed):
195 self._set_with_struct("admin.editor", ed)
197 def remove_editor(self):
198 self._delete('admin.editor')
200 def known_issns(self):
201 """
202 DEPRECATED
204 all issns this journal is known by
206 This used to mean "all issns the journal has ever been known by", but that definition has changed since
207 continuations have been separated from the single journal object model.
209 Now this is just a proxy for self.bibjson().issns()
210 """
211 return self.bibjson().issns()
213 def has_seal(self):
214 return self._get_single("admin.seal", default=False)
216 def set_seal(self, value):
217 self._set_with_struct("admin.seal", value)
219 @property
220 def bulk_upload_id(self):
221 return self._get_single("admin.bulk_upload")
223 def set_bulk_upload_id(self, bulk_upload_id):
224 self._set_with_struct("admin.bulk_upload", bulk_upload_id)
226 ######################################################
227 ## internal utility methods
229 def _generate_index(self):
230 # the index fields we are going to generate
231 issns = []
232 titles = []
233 subjects = []
234 schema_subjects = []
235 schema_codes = []
236 classification = []
237 langs = []
238 country = None
239 license = []
240 publisher = []
241 urls = {}
242 has_seal = None
243 classification_paths = []
244 unpunctitle = None
245 asciiunpunctitle = None
246 continued = "No"
247 has_editor_group = "No"
248 has_editor = "No"
250 # the places we're going to get those fields from
251 cbib = self.bibjson()
253 # get the issns out of the current bibjson
254 issns += cbib.get_identifiers(cbib.P_ISSN)
255 issns += cbib.get_identifiers(cbib.E_ISSN)
257 # get the title out of the current bibjson
258 if cbib.title is not None:
259 titles.append(cbib.title)
261 # get the subjects and concatenate them with their schemes from the current bibjson
262 for subs in cbib.subjects():
263 scheme = subs.get("scheme")
264 term = subs.get("term")
265 subjects.append(term)
266 schema_subjects.append(scheme + ":" + term)
267 classification.append(term)
268 if "code" in subs:
269 schema_codes.append(scheme + ":" + subs.get("code"))
271 # add the keywords to the non-schema subjects (but not the classification)
272 subjects += cbib.keywords
274 # get the bibjson object to convert the languages to the english form
275 langs = cbib.language_name()
277 # get the english name of the country
278 country = cbib.country_name()
280 # get the title of the license
281 lic = cbib.get_license()
282 if lic is not None:
283 license.append(lic.get("title"))
285 # copy the publisher/institution
286 if cbib.publisher:
287 publisher.append(cbib.publisher)
288 if cbib.institution:
289 publisher.append(cbib.institution)
291 # extract and convert all of the urls by their type
292 links = cbib.get_urls()
293 for link in links:
294 lt = link.get("type")
295 if lt is not None:
296 urls[lt + "_url"] = link.get("url")
298 # deduplicate the lists
299 issns = list(set(issns))
300 titles = list(set(titles))
301 subjects = list(set(subjects))
302 schema_subjects = list(set(schema_subjects))
303 classification = list(set(classification))
304 license = list(set(license))
305 publisher = list(set(publisher))
306 schema_codes = list(set(schema_codes))
308 # determine if the seal is applied
309 has_seal = "Yes" if self.has_seal() else "No"
311 # get the full classification paths for the subjects
312 classification_paths = cbib.lcc_paths()
314 # create an unpunctitle
315 if cbib.title is not None:
316 throwlist = string.punctuation + '\n\t'
317 unpunctitle = "".join(c for c in cbib.title if c not in throwlist).strip()
318 try:
319 asciiunpunctitle = unidecode(unpunctitle)
320 except:
321 asciiunpunctitle = unpunctitle
323 # record if this journal object is a continuation
324 if len(cbib.replaces) > 0 or len(cbib.is_replaced_by) > 0:
325 continued = "Yes"
327 if self.editor_group is not None:
328 has_editor_group = "Yes"
330 if self.editor is not None:
331 has_editor = "Yes"
333 # build the index part of the object
334 index = {}
335 if len(issns) > 0:
336 index["issn"] = issns
337 if len(titles) > 0:
338 index["title"] = titles
339 if len(subjects) > 0:
340 index["subject"] = subjects
341 if len(schema_subjects) > 0:
342 index["schema_subject"] = schema_subjects
343 if len(classification) > 0:
344 index["classification"] = classification
345 if len(publisher) > 0:
346 index["publisher"] = publisher
347 if len(license) > 0:
348 index["license"] = license
349 if len(langs) > 0:
350 index["language"] = langs
351 if country is not None:
352 index["country"] = country
353 if len(schema_codes) > 0:
354 index["schema_code"] = schema_codes
355 if len(list(urls.keys())) > 0:
356 index.update(urls)
357 if has_seal:
358 index["has_seal"] = has_seal
359 if len(classification_paths) > 0:
360 index["classification_paths"] = classification_paths
361 if unpunctitle is not None:
362 index["unpunctitle"] = unpunctitle
363 if asciiunpunctitle is not None:
364 index["asciiunpunctitle"] = asciiunpunctitle
365 index["continued"] = continued
366 index["has_editor_group"] = has_editor_group
367 index["has_editor"] = has_editor
368 self._set_with_struct("index", index)
370class Journal(JournalLikeObject):
371 __type__ = "journal"
373 def __init__(self, **kwargs):
374 # FIXME: hack, to deal with ES integration layer being improperly abstracted
375 if "_source" in kwargs:
376 kwargs = kwargs["_source"]
377 self._add_struct(shared_structs.SHARED_BIBJSON)
378 self._add_struct(shared_structs.JOURNAL_BIBJSON_EXTENSION)
379 self._add_struct(JOURNAL_STRUCT)
380 super(Journal, self).__init__(raw=kwargs)
382 #####################################################
383 ## Journal-specific data access methods
385 @classmethod
386 def all_in_doaj(cls, page_size=5000):
387 q = JournalQuery()
388 return cls.iterate(q.all_in_doaj(), page_size=page_size, wrap=True)
390 @classmethod
391 def find_by_publisher(cls, publisher, exact=True):
392 q = PublisherQuery(publisher, exact)
393 result = cls.query(q=q.query())
394 records = [Journal(**r.get("_source")) for r in result.get("hits", {}).get("hits", [])]
395 return records
397 @classmethod
398 def find_by_title(cls, title):
399 q = TitleQuery(title)
400 result = cls.query(q=q.query())
401 records = [Journal(**r.get("_source")) for r in result.get("hits", {}).get("hits", [])]
402 return records
404 @classmethod
405 def delete_selected(cls, query, articles=False, snapshot_journals=True, snapshot_articles=True):
406 if articles:
407 # list the issns of all the journals
408 issns = cls.issns_by_query(query)
410 # issue a delete request over all the articles by those issns
411 from portality.models import Article
412 Article.delete_by_issns(issns, snapshot=snapshot_articles)
414 # snapshot the journal record
415 if snapshot_journals:
416 js = cls.iterate(query, page_size=1000)
417 for j in js:
418 j.snapshot()
420 # finally issue a delete request against the journals
421 cls.delete_by_query(query)
423 def all_articles(self):
424 from portality.models import Article
425 return Article.find_by_issns(self.known_issns())
427 def article_stats(self):
428 from portality.models import Article
429 q = ArticleStatsQuery(self.known_issns())
430 data = Article.query(q=q.query())
431 hits = data.get("hits", {})
432 total = hits.get("total", {}).get('value', 0)
433 latest = None
434 if total > 0:
435 latest = hits.get("hits", [])[0].get("_source").get("created_date")
436 return {
437 "total" : total,
438 "latest" : latest
439 }
441 def mappings(self):
442 return es_data_mapping.create_mapping(self.get_struct(), MAPPING_OPTS)
444 ############################################
445 ## base property methods
447 @property
448 def toc_id(self):
449 bibjson = self.bibjson()
450 id_ = bibjson.get_one_identifier(bibjson.E_ISSN)
451 if not id_:
452 id_ = bibjson.get_one_identifier(bibjson.P_ISSN)
453 if not id_:
454 id_ = self.id
455 return id_
457 @property
458 def last_update_request(self):
459 related = self.related_applications
460 if len(related) == 0:
461 return None
462 sorted(related, key=lambda x : x.get("date_accepted", DEFAULT_TIMESTAMP_VAL))
463 return related[0].get("date_accepted", DEFAULT_TIMESTAMP_VAL)
465 ############################################################
466 ## revision history methods
468 def snapshot(self):
469 from portality.models import JournalHistory
471 snap = deepcopy(self.data)
472 if "id" in snap:
473 snap["about"] = snap["id"]
474 del snap["id"]
475 if "index" in snap:
476 del snap["index"]
477 if "last_updated" in snap:
478 del snap["last_updated"]
479 if "created_date" in snap:
480 del snap["created_date"]
482 hist = JournalHistory(**snap)
483 hist.save()
485 #######################################################################
486 ## Conversion methods
488 def make_continuation(self, type, eissn=None, pissn=None, title=None):
489 # check that the type is one we know. Must be either 'replaces' or 'is_replaced_by'
490 if type not in ["replaces", "is_replaced_by"]:
491 raise ContinuationException("type must be one of 'replaces' or 'is_replaced_by'")
493 if eissn is None and pissn is None:
494 raise ContinuationException("You must create a continuation with at least one issn")
496 # take a copy of the raw data for this journal, and the issns for this journal
497 raw_cont = deepcopy(self.data)
498 bibjson = self.bibjson()
499 issns = bibjson.issns()
500 cissns = []
502 # make a new instance of the journal - this will be our continuation
503 del raw_cont["id"]
504 del raw_cont["created_date"]
505 del raw_cont["last_updated"]
506 j = Journal(**raw_cont)
508 # ensure that the journal is NOT in doaj. That will be for the admin to decide
509 j.set_in_doaj(False)
511 # get a copy of the continuation's bibjson, then remove the existing issns
512 cbj = j.bibjson()
513 cbj.remove_identifiers(cbj.E_ISSN)
514 cbj.remove_identifiers(cbj.P_ISSN)
516 # also remove any existing continuation information
517 del cbj.replaces
518 del cbj.is_replaced_by
519 del cbj.discontinued_date
521 # now write the new identifiers
522 if eissn is not None and eissn != "":
523 cissns.append(eissn)
524 cbj.add_identifier(cbj.E_ISSN, eissn)
525 if pissn is not None and pissn != "":
526 cissns.append(pissn)
527 cbj.add_identifier(cbj.P_ISSN, pissn)
529 # update the title
530 if title is not None:
531 cbj.title = title
533 # now add the issns of the original journal in the appropriate field
534 #
535 # This is a bit confusing - because we're asking this of a Journal object, the relationship type we're asking
536 # for relates to this journal, not to the continuation we are creating. This means that when setting the
537 # new continuations properties, we have to do the opposite to what we do to the journal's properties
538 #
539 # "replaces" means that the current journal replaces the new continuation
540 if type == "replaces":
541 bibjson.replaces = cissns
542 cbj.is_replaced_by = issns
544 # "is_replaced_by" means that the current journal is replaced by the new continuation
545 elif type == "is_replaced_by":
546 bibjson.is_replaced_by = cissns
547 cbj.replaces = issns
549 # save this journal
550 self.save()
552 # save the continuation, and return a copy to the caller
553 j.save()
554 return j
556 ####################################################
557 ## admin data methods
559 def is_in_doaj(self):
560 return self._get_single("admin.in_doaj", default=False)
562 def set_in_doaj(self, value):
563 self._set_with_struct("admin.in_doaj", value)
565 @property
566 def current_application(self):
567 return self._get_single("admin.current_application")
569 def set_current_application(self, application_id):
570 self._set_with_struct("admin.current_application", application_id)
572 def remove_current_application(self):
573 self._delete("admin.current_application")
575 @property
576 def related_applications(self):
577 return self._get_list("admin.related_applications")
579 def add_related_application(self, application_id, date_accepted=None, status=None):
580 obj = {"application_id" : application_id}
581 self._delete_from_list("admin.related_applications", matchsub=obj)
582 if date_accepted is not None:
583 obj["date_accepted"] = date_accepted
584 if status is not None:
585 obj["status"] = status
586 self._add_to_list_with_struct("admin.related_applications", obj)
588 def set_related_applications(self, related_applications_records):
589 self._set_with_struct("admin.related_applications", related_applications_records)
591 def remove_related_applications(self):
592 self._delete("admin.related_applications")
594 def related_application_record(self, application_id):
595 for record in self.related_applications:
596 if record.get("application_id") == application_id:
597 return record
598 return None
600 def latest_related_application_id(self):
601 related = self.related_applications
602 if len(related) == 0:
603 return None
604 if len(related) == 1:
605 return related[0].get("application_id")
606 sorted(related, key=lambda x: x.get("date_accepted", DEFAULT_TIMESTAMP_VAL))
607 return related[0].get("application_id")
609 def is_ticked(self):
610 return self._get_single("admin.ticked", default=False)
612 def set_ticked(self, ticked):
613 self._set_with_struct("admin.ticked", ticked)
615 @property
616 def toc_id(self):
617 bibjson = self.bibjson()
618 id_ = bibjson.get_one_identifier(bibjson.E_ISSN)
619 if not id_:
620 id_ = bibjson.get_one_identifier(bibjson.P_ISSN)
621 if not id_:
622 id_ = self.id
623 return id_
625 ########################################################################
626 ## Functions for handling continuations
628 def get_future_continuations(self):
629 irb = self.bibjson().is_replaced_by
630 q = ContinuationQuery(irb)
632 future = []
633 journals = self.q2obj(q=q.query())
634 subjournals = []
635 for j in journals:
636 subjournals += j.get_future_continuations()
638 future = journals + subjournals
639 return future
641 def get_past_continuations(self):
642 replaces = self.bibjson().replaces
643 q = ContinuationQuery(replaces)
645 past = []
646 journals = self.q2obj(q=q.query())
647 subjournals = []
648 for j in journals:
649 subjournals += j.get_past_continuations()
651 past = journals + subjournals
652 return past
654 #######################################################################
656 #####################################################
657 ## operations we can do to the journal
659 def calculate_tick(self):
660 created_date = self.created_date
661 last_update_request = self.last_update_request
663 tick_threshold = app.config.get("TICK_THRESHOLD", '2014-03-19T00:00:00Z')
664 threshold = dates.parse(tick_threshold)
666 if created_date is None: # don't worry about the last_update_request date - you can't update unless you've been created!
667 # we haven't even saved the record yet. All we need to do is check that the tick
668 # threshold is in the past (which I suppose theoretically it could not be), then
669 # set it
670 if dates.now() >= threshold:
671 self.set_ticked(True)
672 else:
673 self.set_ticked(False)
674 return
676 # otherwise, this is an existing record, and we just need to update it
678 # convert the strings to datetime objects
679 created = dates.parse(created_date)
680 lud = None
681 if last_update_request is not None:
682 lud = dates.parse(last_update_request)
684 if created >= threshold and self.is_in_doaj():
685 self.set_ticked(True)
686 return
688 if lud is not None and lud >= threshold and self.is_in_doaj():
689 self.set_ticked(True)
690 return
692 self.set_ticked(False)
694 def propagate_in_doaj_status_to_articles(self):
695 for article in self.all_articles():
696 article.set_in_doaj(self.is_in_doaj())
697 article.save()
699 def prep(self):
700 self._ensure_in_doaj()
701 self.calculate_tick()
702 self._generate_index()
703 self._calculate_has_apc()
704 self._generate_autocompletes()
705 self.set_last_updated()
707 def save(self, snapshot=True, sync_owner=True, **kwargs):
708 self.prep()
709 self.check_construct()
710 if sync_owner:
711 self._sync_owner_to_application()
712 res = super(Journal, self).save(**kwargs)
713 if snapshot:
714 self.snapshot()
715 return res
717 ######################################################
718 ## internal utility methods
720 def _generate_autocompletes(self):
721 bj = self.bibjson()
722 publisher = bj.publisher
723 institution = bj.institution
724 provider = bj.provider
726 if publisher is not None:
727 self._set_with_struct("index.publisher_ac", publisher.lower())
729 if institution is not None:
730 self._set_with_struct("index.institution_ac", institution.lower())
732 if provider is not None:
733 self._set_with_struct("index.provider_ac", provider.lower())
735 def _calculate_has_apc(self):
736 # work out of the journal has an apc
737 has_apc = "No Information"
738 apc_field_present = len(list(self.bibjson().apc.keys())) > 0
739 if apc_field_present:
740 has_apc = "Yes"
741 elif self.is_ticked():
742 has_apc = "No"
744 self._set_with_struct("index.has_apc", has_apc)
746 def _ensure_in_doaj(self):
747 # switching active to false takes the item out of the DOAJ
748 # though note that switching active to True does not put something IN the DOAJ
749 if not self.bibjson().active:
750 self.set_in_doaj(False)
752 def _sync_owner_to_application(self):
753 if self.current_application is None:
754 return
755 from portality.models import Suggestion
756 ca = Suggestion.pull(self.current_application)
757 if ca is not None and ca.owner != self.owner:
758 ca.set_owner(self.owner)
759 ca.save(sync_owner=False)
762class JournalBibJSON(GenericBibJSON):
763 def __init__(self, bibjson=None):
764 self._add_struct(shared_structs.SHARED_BIBJSON.get("structs", {}).get("bibjson"))
765 self._add_struct(shared_structs.JOURNAL_BIBJSON_EXTENSION.get("structs", {}).get("bibjson"))
766 super(JournalBibJSON, self).__init__(bibjson)
768 ############################################################
769 # journal-specific simple property getter and setters
771 @property
772 def alternative_title(self):
773 return self._get_single("alternative_title")
775 @alternative_title.setter
776 def alternative_title(self, val):
777 self._set_with_struct("alternative_title", val)
779 @property
780 def author_pays_url(self):
781 return self._get_single("author_pays_url")
783 @author_pays_url.setter
784 def author_pays_url(self, val):
785 self._set_with_struct("author_pays_url", val)
787 @property
788 def author_pays(self):
789 return self._get_single("author_pays")
791 @author_pays.setter
792 def author_pays(self, val):
793 self._set_with_struct("author_pays", val)
795 @author_pays.deleter
796 def author_pays(self):
797 self._delete("author_pays")
799 @property
800 def country(self):
801 return self._get_single("country")
803 @country.setter
804 def country(self, val):
805 self._set_with_struct("country", val)
807 def country_name(self):
808 if self.country is not None:
809 return datasets.get_country_name(self.country)
810 return None
812 @property
813 def publisher(self):
814 return self._get_single("publisher")
816 @publisher.setter
817 def publisher(self, val):
818 self._set_with_struct("publisher", val)
820 @property
821 def provider(self):
822 return self._get_single("provider")
824 @provider.setter
825 def provider(self, val):
826 self._set_with_struct("provider", val)
828 @property
829 def institution(self):
830 return self._get_single("institution")
832 @institution.setter
833 def institution(self, val):
834 self._set_with_struct("institution", val)
836 @property
837 def active(self):
838 return self._get_single("active", default=True)
840 @active.setter
841 def active(self, val):
842 self._set_with_struct("active", val)
844 @property
845 def replaces(self):
846 return self._get_list("replaces")
848 @replaces.setter
849 def replaces(self, val):
850 self._set_with_struct("replaces", val)
852 @replaces.deleter
853 def replaces(self):
854 self._delete("replaces")
856 def add_replaces(self, val):
857 self._add_to_list_with_struct("replaces", val)
859 @property
860 def is_replaced_by(self):
861 return self._get_list("is_replaced_by")
863 @is_replaced_by.setter
864 def is_replaced_by(self, val):
865 self._set_with_struct("is_replaced_by", val)
867 @is_replaced_by.deleter
868 def is_replaced_by(self):
869 self._delete("is_replaced_by")
871 def add_is_replaced_by(self, val):
872 self._add_to_list_with_struct("is_replaced_by", val)
874 @property
875 def discontinued_date(self):
876 return self._get_single("discontinued_date")
878 @discontinued_date.setter
879 def discontinued_date(self, val):
880 self._set_with_struct("discontinued_date", val)
882 @discontinued_date.deleter
883 def discontinued_date(self):
884 self._delete("discontinued_date")
886 @property
887 def discontinued_datestamp(self):
888 return self._get_single("discontinued_date", coerce=dataobj.to_datestamp())
890 ########################################################
891 # journal-specific complex part getters and setters
893 @property
894 def language(self):
895 return self._get_list("language")
897 def language_name(self):
898 # copy the languages and convert them to their english forms
899 langs = [datasets.name_for_lang(l) for l in self.language]
900 uc = dataobj.to_unicode()
901 langs = [uc(l) for l in langs]
902 return list(set(langs))
904 def set_language(self, language):
905 self._set_with_struct("language", language)
907 def add_language(self, language):
908 self._add_to_list_with_struct("language", language)
910 def set_license(self, license_title, license_type, url=None, version=None, open_access=None,
911 by=None, sa=None, nc=None, nd=None,
912 embedded=None, embedded_example_url=None):
914 # FIXME: why is there not a "remove license" function
915 if not license_title and not license_type: # something wants to delete the license
916 self._delete("license")
917 return
919 lobj = {"title" : license_title, "type" : license_type}
920 if url is not None:
921 lobj["url"] = url
922 if version is not None:
923 lobj["version"] = version
924 if open_access is not None:
925 lobj["open_access"] = open_access
926 if by is not None:
927 lobj["BY"] = by
928 if sa is not None:
929 lobj["SA"] = sa
930 if nc is not None:
931 lobj["NC"] = nc
932 if nd is not None:
933 lobj["ND"] = nd
934 if embedded is not None:
935 lobj["embedded"] = embedded
936 if embedded_example_url is not None:
937 lobj["embedded_example_url"] = embedded_example_url
939 self._set_with_struct("license", [lobj])
942 def get_license(self):
943 ll = self._get_list("license")
944 if len(ll) > 0:
945 return ll[0]
946 return None
948 def get_license_type(self):
949 lobj = self.get_license()
950 if lobj is not None:
951 return lobj['type']
952 return None
954 @property
955 def open_access(self):
956 return self.get_license().get("open_access", False)
958 def set_open_access(self, open_access):
959 existing = self.get_license()
960 if existing is None:
961 existing = {}
962 existing["open_access"] = open_access
963 self._set_with_struct("license", existing)
965 def set_oa_start(self, year=None, *args, **kwargs):
966 """
967 Volume and Number are deprecated
968 """
969 oaobj = {}
970 if year is not None:
971 oaobj["year"] = year
972 self._set_with_struct("oa_start", oaobj)
974 @property
975 def oa_start(self):
976 return self._get_single("oa_start", default={})
978 def set_oa_end(self, year=None, *args, **kwargs):
979 """
980 Volume and Number are deprecated
981 """
982 oaobj = {}
983 if year is not None:
984 oaobj["year"] = year
985 self._set_with_struct("oa_end", oaobj)
987 @property
988 def oa_end(self):
989 return self._get_single("oa_end", default={})
991 def set_apc(self, currency, average_price):
992 self._set_with_struct("apc.currency", currency)
993 self._set_with_struct("apc.average_price", average_price)
995 @property
996 def apc_url(self):
997 return self._get_single("apc_url")
999 @apc_url.setter
1000 def apc_url(self, val):
1001 self._set_with_struct("apc_url", val)
1003 @property
1004 def apc(self):
1005 return self._get_single("apc", default={})
1007 def set_submission_charges(self, currency, average_price):
1008 self._set_with_struct("submission_charges.currency", currency)
1009 self._set_with_struct("submission_charges.average_price", average_price)
1011 @property
1012 def submission_charges_url(self):
1013 return self._get_single("submission_charges_url")
1015 @submission_charges_url.setter
1016 def submission_charges_url(self, val):
1017 self._set_with_struct("submission_charges_url", val)
1019 @property
1020 def submission_charges(self):
1021 return self._get_single("submission_charges", default={})
1023 """
1024 The below methods work with data stored in this format:
1025 {
1026 "other" : "other value"
1027 "nat_lib" : "library value",
1028 "known" : ["known values"],
1029 "url" : "url>
1030 }
1031 But they need to receive and expose data in the original external form:
1032 {
1033 "policy" : [
1034 "<known policy type (e.g. LOCKSS)>",
1035 ["<policy category>", "<previously unknown policy type>"]
1036 ],
1037 "url" : "<url to policy information page>"
1038 }
1039 """
1041 def set_archiving_policy(self, policies, policy_url):
1042 obj = {}
1043 known = []
1044 for p in policies:
1045 if isinstance(p, list):
1046 k, v = p
1047 if k.lower() == "other":
1048 obj["other"] = v
1049 elif k.lower() == "a national library":
1050 obj["nat_lib"] = v
1051 else:
1052 known.append(p)
1053 if len(known) > 0:
1054 obj["known"] = known
1055 if policy_url is not None:
1056 obj["url"] = policy_url
1058 self._set_with_struct("archiving_policy", obj)
1060 def add_archiving_policy(self, policy_name):
1061 if isinstance(policy_name, list):
1062 k, v = policy_name
1063 if k.lower() == "other":
1064 self._set_with_struct("archiving_policy.other", v)
1065 elif k.lower() == "a national library":
1066 self._set_with_struct("archiving_policy.nat_lib", v)
1067 else:
1068 self._add_to_list_with_struct("archiving_policy.known", policy_name)
1070 @property
1071 def archiving_policy(self):
1072 ap = self._get_single("archiving_policy", default={})
1073 ret = {"policy" : []}
1074 if "url" in ap:
1075 ret["url"] = ap["url"]
1076 if "known" in ap:
1077 ret["policy"] += ap["known"]
1078 if "nat_lib" in ap:
1079 ret["policy"].append(["A national library", ap["nat_lib"]])
1080 if "other" in ap:
1081 ret["policy"].append(["Other", ap["other"]])
1082 return ret
1084 @property
1085 def flattened_archiving_policies(self):
1086 ap = self._get_single("archiving_policy", default={})
1087 ret = []
1088 if "known" in ap:
1089 ret += ap["known"]
1090 if "nat_lib" in ap:
1091 ret.append("A national library: " + ap["nat_lib"])
1092 if "other" in ap:
1093 ret.append("Other: " + ap["other"])
1095 return ret
1097 def set_editorial_review(self, process, review_url):
1098 self._set_with_struct("editorial_review.process", process)
1099 self._set_with_struct("editorial_review.url", review_url)
1101 @property
1102 def editorial_review(self):
1103 return self._get_single("editorial_review", default={})
1105 def set_plagiarism_detection(self, url, has_detection=True):
1106 self._set_with_struct("plagiarism_detection.detection", has_detection)
1107 self._set_with_struct("plagiarism_detection.url", url)
1109 @property
1110 def plagiarism_detection(self):
1111 return self._get_single("plagiarism_detection", default={})
1113 def set_article_statistics(self, url, has_statistics=True):
1114 self._set_with_struct("article_statistics.statistics", has_statistics)
1115 self._set_with_struct("article_statistics.url", url)
1117 @property
1118 def article_statistics(self):
1119 return self._get_single("article_statistics", default={})
1121 @property
1122 def deposit_policy(self):
1123 return self._get_list("deposit_policy")
1125 @deposit_policy.setter
1126 def deposit_policy(self, policies):
1127 self._set_with_struct("deposit_policy", policies)
1129 def add_deposit_policy(self, policy):
1130 self._add_to_list_with_struct("deposit_policy", policy)
1132 def set_author_copyright(self, url, holds_copyright=True):
1133 self._set_with_struct("author_copyright.copyright", holds_copyright)
1134 self._set_with_struct("author_copyright.url", url)
1136 @property
1137 def author_copyright(self):
1138 return self._get_single("author_copyright", default={})
1140 def set_author_publishing_rights(self, url, holds_rights=True):
1141 self._set_with_struct("author_publishing_rights.publishing_rights", holds_rights)
1142 self._set_with_struct("author_publishing_rights.url", url)
1144 @property
1145 def author_publishing_rights(self):
1146 return self._get_single("author_publishing_rights", default={})
1148 @property
1149 def allows_fulltext_indexing(self):
1150 return self._get_single("allows_fulltext_indexing")
1152 @allows_fulltext_indexing.setter
1153 def allows_fulltext_indexing(self, allows):
1154 self._set_with_struct("allows_fulltext_indexing", allows)
1156 @property
1157 def persistent_identifier_scheme(self):
1158 return self._get_list("persistent_identifier_scheme")
1160 @persistent_identifier_scheme.setter
1161 def persistent_identifier_scheme(self, schemes):
1162 self._set_with_struct("persistent_identifier_scheme", schemes)
1164 def add_persistent_identifier_scheme(self, scheme):
1165 self._add_to_list_with_struct("persistent_identifier_scheme", scheme)
1167 @property
1168 def format(self):
1169 return self._get_list("format")
1171 @format.setter
1172 def format(self, form):
1173 self._set_with_struct("format", form)
1175 def add_format(self, form):
1176 self._add_to_list_with_struct("format", form)
1178 @property
1179 def publication_time(self):
1180 return self._get_single("publication_time")
1182 @publication_time.setter
1183 def publication_time(self, weeks):
1184 self._set_with_struct("publication_time", weeks)
1186 # to help with ToC - we prefer to refer to a journal by E-ISSN, or
1187 # if not, then P-ISSN
1188 def get_preferred_issn(self):
1189 issn = self.get_one_identifier(self.E_ISSN)
1190 if not issn:
1191 issn = self.get_one_identifier(self.P_ISSN)
1192 return issn
1194JOURNAL_STRUCT = {
1195 "fields" : {
1196 "id" : {"coerce" : "unicode"},
1197 "created_date" : {"coerce" : "utcdatetime"},
1198 "last_updated" : {"coerce" : "utcdatetime"},
1199 "last_reapplication" : {"coerce" : "utcdatetime"},
1200 "last_manual_update" : {"coerce" : "utcdatetime"}
1201 },
1202 "objects" : [
1203 "admin", "index"
1204 ],
1206 "structs" : {
1207 "admin" : {
1208 "fields" : {
1209 "in_doaj" : {"coerce" : "bool"},
1210 "ticked" : {"coerce" : "bool"},
1211 "seal" : {"coerce" : "bool"},
1212 "bulk_upload" : {"coerce" : "unicode"},
1213 "owner" : {"coerce" : "unicode"},
1214 "editor_group" : {"coerce" : "unicode"},
1215 "editor" : {"coerce" : "unicode"},
1216 "current_application" : {"coerce" : "unicode"}
1217 },
1218 "lists" : {
1219 "contact" : {"contains" : "object"},
1220 "notes" : {"contains" : "object"},
1221 "related_applications" : {"contains" : "object"}
1222 },
1223 "structs" : {
1224 "contact" : {
1225 "fields" : {
1226 "email" : {"coerce" : "unicode"},
1227 "name" : {"coerce" : "unicode"}
1228 }
1229 },
1230 "notes" : {
1231 "fields" : {
1232 "note" : {"coerce" : "unicode"},
1233 "date" : {"coerce" : "utcdatetime"}
1234 }
1235 },
1236 "related_applications" : {
1237 "fields" : {
1238 "application_id" : {"coerce" : "unicode"},
1239 "date_accepted" : {"coerce" : "utcdatetime"},
1240 "status" : {"coerce" : "unicode"}
1241 }
1242 }
1243 }
1244 },
1245 "index" : {
1246 "fields" : {
1247 "country" : {"coerce" : "unicode"},
1248 "homepage_url" : {"coerce" : "unicode"},
1249 "waiver_policy_url" : {"coerce" : "unicode"},
1250 "editorial_board_url" : {"coerce" : "unicode"},
1251 "aims_scope_url" : {"coerce" : "unicode"},
1252 "author_instructions_url" : {"coerce" : "unicode"},
1253 "oa_statement_url" : {"coerce" : "unicode"},
1254 "has_apc" : {"coerce" : "unicode"},
1255 "has_seal" : {"coerce" : "unicode"},
1256 "unpunctitle" : {"coerce" : "unicode"},
1257 "asciiunpunctitle" : {"coerce" : "unicode"},
1258 "continued" : {"coerce" : "unicode"},
1259 "has_editor_group" : {"coerce" : "unicode"},
1260 "has_editor" : {"coerce" : "unicode"},
1261 "publisher_ac" : {"coerce" : "unicode"},
1262 "institution_ac" : {"coerce" : "unicode"},
1263 "provider_ac" : {"coerce" : "unicode"}
1264 },
1265 "lists" : {
1266 "issn" : {"contains" : "field", "coerce" : "unicode"},
1267 "title" : {"contains" : "field", "coerce" : "unicode"},
1268 "subject" : {"contains" : "field", "coerce" : "unicode"},
1269 "schema_subject" : {"contains" : "field", "coerce" : "unicode"},
1270 "classification" : {"contains" : "field", "coerce" : "unicode"},
1271 "language" : {"contains" : "field", "coerce" : "unicode"},
1272 "license" : {"contains" : "field", "coerce" : "unicode"},
1273 "classification_paths" : {"contains" : "field", "coerce" : "unicode"},
1274 "schema_code" : {"contains" : "field", "coerce" : "unicode"},
1275 "publisher" : {"contains" : "field", "coerce" : "unicode"}
1276 }
1277 }
1278 }
1279}
1281MAPPING_OPTS = {
1282 "dynamic": None,
1283 "coerces": app.config["DATAOBJ_TO_MAPPING_DEFAULTS"],
1284 "exceptions": {
1285 "admin.notes.note": {
1286 "type": "text",
1287 "index": False,
1288 #"include_in_all": False # Removed in es6 fixme: do we need to look at copy_to for the mapping?
1289 }
1290 }
1291}
1293########################################################
1294## Data Access Queries
1296class JournalQuery(object):
1297 """
1298 wrapper around the kinds of queries we want to do against the journal type
1299 """
1300 issn_query = {
1301 "track_total_hits" : True,
1302 "query": {
1303 "bool": {
1304 "must": [
1305 {
1306 "terms" : { "index.issn.exact" : "<issn>" }
1307 }
1308 ]
1309 }
1310 }
1311 }
1313 all_doaj = {
1314 "track_total_hits" : True,
1315 "query" : {
1316 "bool" : {
1317 "must" : [
1318 {"term" : {"admin.in_doaj" : True}}
1319 ]
1320 }
1321 }
1322 }
1324 _minified_fields = ["id", "bibjson.title", "last_updated"]
1326 def __init__(self, minified=False, sort_by_title=False):
1327 self.query = None
1328 self.minified = minified
1329 self.sort_by_title = sort_by_title
1331 def find_by_issn(self, issns, in_doaj=None, max=10):
1332 self.query = deepcopy(self.issn_query)
1333 self.query["query"]["bool"]["must"][0]["terms"]["index.issn.exact"] = issns
1334 if in_doaj is not None:
1335 self.query["query"]["bool"]["must"].append({"term" : {"admin.in_doaj" : in_doaj}})
1336 self.query["size"] = max
1338 def all_in_doaj(self):
1339 q = deepcopy(self.all_doaj)
1340 if self.minified:
1341 q["fields"] = self._minified_fields
1342 if self.sort_by_title:
1343 q["sort"] = [{"bibjson.title.exact" : {"order" : "asc"}}]
1344 return q
1346class IssnQuery(object):
1347 base_query = {
1348 "track_total_hits": True,
1349 "query" : {
1350 "term" : { "admin.owner.exact" : "<owner id here>" }
1351 },
1352 "size" : 0,
1353 "aggs" : {
1354 "issns" : {
1355 "terms" : {
1356 "field" : "index.issn.exact",
1357 "size" : 10000,
1358 "order" : {"_key" : "asc"}
1359 }
1360 }
1361 }
1362 }
1364 def __init__(self, owner):
1365 self._query = deepcopy(self.base_query)
1366 self._query["query"]["term"]["admin.owner.exact"] = owner
1368 def query(self):
1369 return self._query
1371class PublisherQuery(object):
1372 exact_query = {
1373 "track_total_hits": True,
1374 "query" : {
1375 "term" : {"index.publisher.exact" : "<publisher name here>"}
1376 },
1377 "size": 10000
1378 }
1380 inexact_query = {
1381 "track_total_hits": True,
1382 "query" : {
1383 "term" : {"index.publisher" : "<publisher name here>"}
1384 },
1385 "size": 10000
1386 }
1388 def __init__(self, publisher, exact=True):
1389 self.publisher = publisher
1390 self.exact = exact
1392 def query(self):
1393 q = None
1394 if self.exact:
1395 q = deepcopy(self.exact_query)
1396 q["query"]["term"]["index.publisher.exact"] = self.publisher
1397 else:
1398 q = deepcopy(self.inexact_query)
1399 q["query"]["term"]["index.publisher"] = self.publisher.lower()
1400 return q
1402class TitleQuery(object):
1403 base_query = {
1404 "track_total_hits": True,
1405 "query" : {
1406 "term" : {"index.title.exact" : "<title here>"}
1407 },
1408 "size": 10000
1409 }
1411 def __init__(self, title):
1412 self.title = title
1414 def query(self):
1415 q = deepcopy(self.base_query)
1416 q["query"]["term"]["index.title.exact"] = self.title
1417 return q
1419class ContinuationQuery(object):
1420 def __init__(self, issns):
1421 self.issns = issns
1423 def query(self):
1424 return {
1425 "track_total_hits": True,
1426 "query" : {
1427 "bool" : {
1428 "must" : [
1429 {"terms" : {"index.issn.exact" : self.issns}}
1430 ]
1431 }
1432 },
1433 "size" : 10000
1434 }
1436class ArticleStatsQuery(object):
1437 def __init__(self, issns):
1438 self.issns = issns
1440 def query(self):
1441 return {
1442 "track_total_hits": True,
1443 "query" : {
1444 "bool" : {
1445 "must" : [
1446 {"terms" : {"index.issn.exact" : self.issns}},
1447 {"term" : {"admin.in_doaj" : True}}
1448 ]
1449 }
1450 },
1451 "size" : 1,
1452 "_source" : {
1453 "include" : ["created_date"]
1454 },
1455 "sort" : [{"created_date" : {"order" : "desc"}}]
1456 }