Coverage for portality/crosswalks/journal_questions.py: 65%
176 statements
« prev ^ index » next coverage.py v6.4.2, created at 2022-07-19 18:38 +0100
« prev ^ index » next coverage.py v6.4.2, created at 2022-07-19 18:38 +0100
1from copy import deepcopy
2from portality import datasets
3from portality.crosswalks.journal_form import JournalFormXWalk
4from portality.forms.application_forms import ApplicationFormFactory
7class JournalXwalkException(Exception):
8 pass
11class Journal2QuestionXwalk(object):
12 """
13 ~~JournalQuestions:Crosswalk->Journal:Form~~
14 """
15 QTUP = [
16 ("alternative_title", "Alternative title"),
17 ("apc_charges", "APC amount"),
18 ("apc_url", "APC information URL"),
19 ("preservation_service", "Preservation Services"),
20 ("preservation_service_library", "Preservation Service: national library"),
21 ("preservation_service_url", "Preservation information URL"),
22 ("copyright_author_retains", "Author holds copyright without restrictions"),
23 ("copyright_url", "Copyright information URL"),
24 ("publisher_country", "Country of publisher"),
25 ("deposit_policy", "Deposit policy directory"),
26 ("review_process", "Review process"),
27 ("review_url", "Review process information URL"),
28 ("pissn", "Journal ISSN (print version)"),
29 ("eissn", "Journal EISSN (online version)"),
30 ("continues", "Continues"),
31 ("continued_by", "Continued By"),
32 ("institution_name", "Society or institution"),
33 ("keywords", "Keywords"),
34 ("language", "Languages in which the journal accepts manuscripts"),
35 ("license_attributes", "License attributes"),
36 ("license_display", "Machine-readable CC licensing information embedded or displayed in articles"),
37 ("license_display_example_url", "URL to an example page with embedded licensing information"),
38 ("boai", "Does the journal comply to DOAJ's definition of open access?"),
39 ("license", "Journal license"),
40 ("license_terms_url", "URL for license terms"),
41 ("oa_statement_url", "URL for journal's Open Access statement"),
42 ("oa_start", "When did the journal start to publish all content using an open license?"),
43 ("journal_url", "Journal URL"),
44 ("aims_scope_url", "URL for journal's aims & scope"),
45 ("editorial_board_url", "URL for the Editorial Board page"),
46 ("author_instructions_url", "URL for journal's instructions for authors"),
47 ("waiver_url", "Waiver policy information URL"),
48 ("persistent_identifiers", "Persistent article identifiers"),
49 ("plagiarism_detection", "Journal plagiarism screening policy"),
50 ("plagiarism_url", "Plagiarism information URL"),
51 ("publication_time_weeks", "Average number of weeks between article submission and publication"),
52 ("publisher_name", "Publisher"),
53 ("other_charges_url", "Other fees information URL"),
54 ("title", "Journal title"),
55 ("institution_country", "Country of society or institution"),
56 ("apc", "APC"),
57 ("has_other_charges", "Has other fees"),
58 ("has_waiver", "Journal waiver policy (for developing country authors etc)"),
59 ("orcid_ids", "Article metadata includes ORCIDs"),
60 ("open_citations", "Journal complies with I4OC standards for open citations"),
61 ("deposit_policy_url", "URL for deposit policy"),
62 ("subject", "LCC Codes")
63 ]
65 DEGEN = {
66 "preservation_service_other": "preservation_service",
67 "deposit_policy_other": "deposit_policy",
68 "review_process_other": "review_process",
69 "persistent_identifiers_other": "persistent_identifiers"
70 }
72 @classmethod
73 def q(cls, ident):
74 if ident in cls.DEGEN:
75 ident = cls.DEGEN[ident]
76 for k, q in cls.QTUP:
77 if k == ident:
78 return q
79 return None
81 @classmethod
82 def q2idx(cls, ident):
83 if ident in cls.DEGEN:
84 ident = cls.DEGEN[ident]
85 i = 0
86 for k, q in cls.QTUP:
87 if k == ident:
88 return i
89 i += 1
90 return -1
92 @classmethod
93 def p(cls, ident):
94 """ p is a backwards q - i.e. get the question from the CSV heading """
95 if ident in cls.DEGEN.values():
96 for k, v in cls.DEGEN.items():
97 if ident == v:
98 ident = k
99 for k, q in cls.QTUP:
100 if q == ident:
101 return k
102 return None
104 @classmethod
105 def question_list(cls):
106 return [q for _, q in cls.QTUP]
108 @classmethod
109 def journal2question(cls, journal):
110 """
111 Convert the journal data to key value pairs for use in the Journal CSV
112 """
114 def yes_no_or_blank(val):
115 return "Yes" if val in [True, "True", "Yes", "true", "yes", "y"] else "No" if val is not None else ""
117 def other_list(main_field, other_field, other_value):
118 aids = forminfo.get(main_field, [])
119 if aids is None or aids == "" or aids == "None":
120 aids = []
122 # if the xwalk has returned a single-list element like ["None"]
123 # we want to strip that "None" for the purpose of the CSV
124 if "none" in aids:
125 aids.remove("none")
127 aidother = forminfo.get(other_field)
129 if other_value in aids:
130 aids.remove(other_value)
131 if aidother is not None and aidother != "" and aidother != "None":
132 aids.append(aidother)
133 return ", ".join(aids)
135 def yes_or_blank(val):
136 return "Yes" if val in [True, "True", "Yes", "true", "yes", "y"] else ''
138 def license_checkbox(val):
139 opts = {}
140 [opts.update({k: v}) for k, v in ApplicationFormFactory.choices_for("license_attributes")]
141 nv = [opts.get(v) for v in val]
142 return ", ".join(nv)
144 def languages(vals):
145 keep = []
146 codes = [c.lower() for c, _ in datasets.language_options]
147 names = [n.lower() for _, n in datasets.language_options]
148 for v in vals:
149 if v.lower() in codes:
150 keep.append(datasets.name_for_lang(v))
151 elif v.lower() in names:
152 keep.append(v)
153 return ", ".join(keep)
155 # start by converting the object to the forminfo version
156 forminfo = JournalFormXWalk.obj2form(journal)
158 kvs = []
160 # create key/value pairs for the questions in order
161 # About
162 kvs.append((cls.q("title"), forminfo.get("title")))
163 kvs.append((cls.q("journal_url"), forminfo.get("journal_url")))
164 kvs.append((cls.q("oa_start"), forminfo.get("oa_start")))
165 kvs.append((cls.q("alternative_title"), forminfo.get("alternative_title")))
166 kvs.append((cls.q("pissn"), forminfo.get("pissn")))
167 kvs.append((cls.q("eissn"), forminfo.get("eissn")))
168 kvs.append((cls.q("keywords"), ", ".join(forminfo.get("keywords", []))))
169 kvs.append((cls.q("language"), languages(forminfo.get("language", []))))
170 kvs.append((cls.q("publisher_name"), forminfo.get("publisher_name")))
171 kvs.append((cls.q("publisher_country"), datasets.get_country_name(forminfo.get("publisher_country"))))
172 kvs.append((cls.q("institution_name"), forminfo.get("institution_name")))
173 kvs.append((cls.q("institution_country"), datasets.get_country_name(forminfo.get("institution_country"))))
175 # Copyright & licensing
176 lic = ", ".join(forminfo.get("license", []))
177 kvs.append((cls.q("license"), lic))
179 kvs.append((cls.q("license_attributes"), license_checkbox(forminfo.get("license_attributes", []))))
180 kvs.append((cls.q("license_terms_url"), forminfo.get("license_terms_url")))
181 kvs.append((cls.q("license_display"), yes_or_blank(forminfo.get("license_display"))))
182 kvs.append((cls.q("license_display_example_url"), forminfo.get("license_display_example_url")))
184 kvs.append((cls.q("copyright_author_retains"), yes_no_or_blank(forminfo.get("copyright_author_retains"))))
185 kvs.append((cls.q("copyright_url"), forminfo.get("copyright_url")))
187 # Editorial
188 review_process = other_list("review_process", "review_process_other", "other")
189 kvs.append((cls.q("review_process"), review_process))
190 kvs.append((cls.q("review_url"), forminfo.get("review_url")))
191 kvs.append((cls.q("plagiarism_detection"), yes_no_or_blank(forminfo.get("plagiarism_detection"))))
192 kvs.append((cls.q("plagiarism_url"), forminfo.get("plagiarism_url")))
193 kvs.append((cls.q("aims_scope_url"), forminfo.get("aims_scope_url")))
194 kvs.append((cls.q("editorial_board_url"), forminfo.get("editorial_board_url")))
195 kvs.append((cls.q("author_instructions_url"), forminfo.get("author_instructions_url")))
196 kvs.append((cls.q("publication_time_weeks"), str(forminfo.get("publication_time_weeks"))))
198 # Business Model
199 kvs.append((cls.q("apc"), yes_no_or_blank(forminfo.get("apc"))))
200 kvs.append((cls.q("apc_url"), forminfo.get("apc_url")))
201 apcs = []
202 for apc_charge in forminfo.get("apc_charges", []):
203 apcs.append(str(apc_charge.get("apc_max")) + " " + apc_charge.get("apc_currency"))
204 kvs.append((cls.q("apc_charges"), "; ".join(apcs)))
206 kvs.append((cls.q("has_waiver"), yes_no_or_blank(forminfo.get("has_waiver"))))
207 kvs.append((cls.q("waiver_url"), forminfo.get("waiver_url")))
208 kvs.append((cls.q("has_other_charges"), yes_no_or_blank(forminfo.get("has_other_charges"))))
209 kvs.append((cls.q("other_charges_url"), forminfo.get("other_charges_url")))
211 # Best Practice
212 dap = deepcopy(forminfo.get("preservation_service", []))
213 if "national_library" in dap: dap.remove("national_library")
214 if "other" in dap:
215 dap.remove("other")
216 dap.append(forminfo.get("preservation_service_other"))
217 if "none" in dap: dap.remove("none")
218 kvs.append((cls.q("preservation_service"), ", ".join(dap)))
219 kvs.append((cls.q("preservation_service_library"), "; ".join(forminfo.get("preservation_service_library", []))))
220 kvs.append((cls.q("preservation_service_url"), forminfo.get("preservation_service_url")))
222 deposit_policies = other_list("deposit_policy", "deposit_policy_other", "other")
223 kvs.append((cls.q("deposit_policy"), deposit_policies))
224 kvs.append((cls.q("deposit_policy_url"), forminfo.get("deposit_policy_url")))
226 article_identifiers = other_list("persistent_identifiers", "persistent_identifiers_other", "other")
227 kvs.append((cls.q("persistent_identifiers"), article_identifiers))
228 kvs.append((cls.q("orcid_ids"), yes_no_or_blank(forminfo.get("orcid_ids"))))
229 kvs.append((cls.q("open_citations"), yes_no_or_blank(forminfo.get("open_citations"))))
231 # Open Access Compliance (usually first on form)
232 kvs.append((cls.q("boai"), yes_or_blank(forminfo.get("boai"))))
233 kvs.append((cls.q("oa_statement_url"), forminfo.get("oa_statement_url")))
235 kvs.append((cls.q("continues"), ", ".join(forminfo.get("continues"))))
236 kvs.append((cls.q("continued_by"), ", ".join(forminfo.get("continued_by"))))
238 kvs.append((cls.q("subject"), "|".join(forminfo.get("subject"))))
240 return kvs
242 @classmethod
243 def question2form(cls, journal, questions):
244 """ Create a Journal (update) form from the CSV questions
245 :param journal - A journal object to base the form on
246 :param questions - a dict of header and value as provided by csv.DictReader
247 {'header': 'answer', 'header2': 'answer2'...}
249 :return: A MultiDict of updates to the form for validation and a formatted string of what was changed
250 """
252 def _y_or_blank(x):
253 """ Some of the work of undoing yes_or_blank() """
254 return 'y' if x == 'Yes' else ''
256 def _y_n_or_blank(x):
257 """ Undoing yes_no_or_blank() to 'y' or 'n' ONLY """
258 return 'y' if x == 'Yes' else 'n' if x != '' else None
260 def _comma_to_list(x):
261 """ Comma separated string to list of stripped items """
262 return [_.strip() for _ in x.split(',')]
264 def _lang_codes(x):
265 """ Get the uppercase 2-char language string for each comma separated language name"""
266 langs = [datasets.language_for(_) for _ in _comma_to_list(x)]
267 return [l.alpha_2.upper() for l in langs if l is not None]
269 def _unfurl_apc(x):
270 """ Allow an APC update by splitting the APC string from the spreadsheet """
271 apcs = []
272 for apc in x.split('; '):
273 [amt, cur] = apc.split()
274 apcs.append({'apc_max': round(float(amt)), 'apc_currency': cur})
275 return apcs
277 # Undo the transformations applied to specific fields. TODO: Add these as they are encountered in the wild
278 REVERSE_TRANSFORM_MAP = {
279 'keywords': _comma_to_list,
280 'language': _lang_codes,
281 'publisher_country': datasets.get_country_code,
282 'institution_country': datasets.get_country_code,
283 'license': _comma_to_list,
284 'license_display': _y_or_blank,
285 'copyright_author_retains': _y_n_or_blank,
286 'review_process': _comma_to_list,
287 'plagiarism_detection': _y_n_or_blank,
288 'publication_time_weeks': lambda x: round(float(x)),
289 'apc': _y_or_blank,
290 'apc_charges': _unfurl_apc,
291 'has_waiver': _y_n_or_blank,
292 'has_other_charges': _y_n_or_blank,
293 'deposit_policy': _comma_to_list,
294 'preservation_service': _comma_to_list,
295 'persistent_identifiers': _comma_to_list,
296 'orcid_ids': _y_n_or_blank,
297 'open_citations': _y_n_or_blank,
298 'boai': _y_or_blank,
299 }
301 def csv2formval(key, value):
302 # We keep getting naff data with trailing whitespace, so strip those out
303 if isinstance(value, str):
304 value = value.strip()
306 # Apply the reverse transformation back from display value to storage value.
307 try:
308 return REVERSE_TRANSFORM_MAP[key](value)
309 except KeyError:
310 # This field doesn't appear in the map, return unchanged.
311 return value
313 # start by converting the object to the forminfo version
314 # ~~->JournalForm:Crosswalk~~
315 forminfo = JournalFormXWalk.obj2form(journal)
317 # Get the CSV output this journal currently produces so that we can skip over unchanged fields
318 current_csv = {x: y for (x, y) in cls.journal2question(journal)}
320 # Collect the update report
321 updates = []
323 for k, v in questions.items():
324 # To save us writing all of the reverse transforms, we can skip this question entirely if unchanged from current journal
325 if k in current_csv and current_csv[k] == v:
326 continue
328 # Only deal with a question if there's a value - TODO: what does this mean for yes_no_or_blank?
329 if len(v.strip()) > 0:
330 # Get the question key from the CSV column header
331 form_key = cls.p(k)
333 # Account for columns that might not correspond to the form
334 if form_key is not None:
336 # Only update if the value is changed (apply the reverse transformation to get the form value back)
337 current_val = forminfo.get(form_key)
338 update_val = csv2formval(form_key, v)
339 if current_val != update_val:
340 updates.append('Updating {0}, from "{1}" to "{2}"'.format(form_key, current_val, update_val))
341 forminfo[form_key] = update_val
343 return JournalFormXWalk.forminfo2multidict(forminfo), updates