Coverage for portality / crosswalks / journal_questions.py: 92%
177 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-05 00:09 +0100
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-05 00:09 +0100
1from copy import deepcopy
2from portality import datasets
3from portality.crosswalks.journal_form import JournalFormXWalk
4from portality.forms.application_forms import ApplicationFormFactory
7class JournalXwalkException(Exception):
8 pass
10class QuestionTransformError(JournalXwalkException):
11 def __init__(self, key, value, message):
12 self.key = key
13 self.value = value
14 super(QuestionTransformError, self).__init__(message)
16class Journal2QuestionXwalk(object):
17 """
18 ~~JournalQuestions:Crosswalk->Journal:Form~~
19 """
20 QTUP = [
21 ("alternative_title", "Alternative title"),
22 ("apc_charges", "APC amount"),
23 ("apc_url", "APC information URL"),
24 ("preservation_service", "Preservation Services"),
25 ("preservation_service_library", "Preservation Service: national library"),
26 ("preservation_service_url", "Preservation information URL"),
27 ("copyright_author_retains", "Author holds copyright without restrictions"),
28 ("copyright_url", "Copyright information URL"),
29 ("publisher_country", "Country of publisher"),
30 ("deposit_policy", "Deposit policy directory"),
31 ("review_process", "Review process"),
32 ("review_url", "Review process information URL"),
33 ("pissn", "Journal ISSN (print version)"),
34 ("eissn", "Journal EISSN (online version)"),
35 ("continues", "Continues"),
36 ("continued_by", "Continued By"),
37 ("institution_name", "Other organisation"),
38 ("keywords", "Keywords"),
39 ("language", "Languages in which the journal accepts manuscripts"),
40 ("license_attributes", "License attributes"),
41 ("license_display", "Machine-readable CC licensing information embedded or displayed in articles"),
42 ("boai", "Does the journal comply to DOAJ's definition of open access?"),
43 ("license", "Journal license"),
44 ("license_terms_url", "URL for license terms"),
45 ("oa_start", "When did the journal start to publish all content using an open license?"),
46 ("journal_url", "Journal URL"),
47 ("aims_scope_url", "URL for journal's aims & scope"),
48 ("editorial_board_url", "URL for the Editorial Board page"),
49 ("author_instructions_url", "URL for journal's instructions for authors"),
50 ("waiver_url", "Waiver policy information URL"),
51 ("persistent_identifiers", "Persistent article identifiers"),
52 ("plagiarism_detection", "Journal plagiarism screening policy"),
53 ("publication_time_weeks", "Average number of weeks between article submission and publication"),
54 ("publisher_name", "Publisher"),
55 ("other_charges_url", "Other fees information URL"),
56 ("title", "Journal title"),
57 ("institution_country", "Country of other organisation"),
58 ("apc", "APC"),
59 ("has_other_charges", "Has other fees"),
60 ("has_waiver", "Journal waiver policy (for developing country authors etc)"),
61 ("deposit_policy_url", "URL for deposit policy"),
62 ("subject", "LCC Codes"),
63 ("s2o", "Subscribe to Open"),
64 ("mirror", "Mirror Journal"),
65 ("ojc", "Open Journals Collective"),
66 ]
68 DEGEN = {
69 "preservation_service_other": "preservation_service",
70 "deposit_policy_other": "deposit_policy",
71 "review_process_other": "review_process",
72 "persistent_identifiers_other": "persistent_identifiers"
73 }
75 @classmethod
76 def q(cls, ident, val=None):
77 if ident in cls.DEGEN:
78 ident = cls.DEGEN[ident]
79 for k, q in cls.QTUP:
80 if k == ident:
81 return q
82 return None
84 @classmethod
85 def p(cls, ident):
86 """ p is a backwards q - i.e. get the question from the CSV heading """
87 if ident in cls.DEGEN.values():
88 for k, v in cls.DEGEN.items():
89 if ident.lower() == v.lower():
90 ident = k
91 for k, q in cls.QTUP:
92 if q.lower() == ident.lower():
93 return k
94 return None
96 @classmethod
97 def question_list(cls):
98 return [q for _, q in cls.QTUP]
100 @classmethod
101 def journal2question(cls, journal):
102 """
103 Convert the journal data to key value pairs for use in the Journal CSV
104 """
106 def yes_no_or_blank(val):
107 return "Yes" if val in [True, "True", "Yes", "true", "yes", "y"] else "No" if val is not None else ""
109 def other_list(main_field, other_field, other_value):
110 aids = forminfo.get(main_field, [])
111 if aids is None or aids == "" or aids == "None":
112 aids = []
114 # if the xwalk has returned a single-list element like ["None"]
115 # we want to strip that "None" for the purpose of the CSV
116 if "none" in aids:
117 aids.remove("none")
119 aidother = forminfo.get(other_field)
121 if other_value in aids:
122 aids.remove(other_value)
123 if aidother is not None and aidother != "" and aidother != "None":
124 aids.append(aidother)
125 return ", ".join(aids)
127 def yes_or_blank(val):
128 return "Yes" if val in [True, "True", "Yes", "true", "yes", "y"] else ''
130 def license_checkbox(val):
131 opts = {}
132 [opts.update({k: v}) for k, v in ApplicationFormFactory.choices_for("license_attributes")]
133 nv = [opts.get(v) for v in val]
134 return ", ".join(str(v) for v in nv)
136 def languages(vals):
137 keep = []
138 codes = [c.lower() for c, _ in datasets.language_options]
139 names = [n.lower() for _, n in datasets.language_options]
140 for v in vals:
141 if v.lower() in codes:
142 keep.append(datasets.name_for_lang(v))
143 elif v.lower() in names:
144 keep.append(v)
145 return ", ".join(keep)
147 # start by converting the object to the forminfo version
148 forminfo = JournalFormXWalk.obj2form(journal)
150 kvs = []
152 # create key/value pairs for the questions in order
153 # About
154 kvs.append((cls.q("title"), forminfo.get("title")))
155 kvs.append((cls.q("journal_url"), forminfo.get("journal_url")))
156 kvs.append((cls.q("oa_start"), forminfo.get("oa_start")))
157 kvs.append((cls.q("alternative_title"), forminfo.get("alternative_title")))
158 kvs.append((cls.q("pissn"), forminfo.get("pissn")))
159 kvs.append((cls.q("eissn"), forminfo.get("eissn")))
160 kvs.append((cls.q("keywords"), ", ".join(forminfo.get("keywords", []))))
161 kvs.append((cls.q("language"), languages(forminfo.get("language", []))))
162 kvs.append((cls.q("publisher_name"), forminfo.get("publisher_name")))
163 kvs.append((cls.q("publisher_country"), datasets.get_country_name(forminfo.get("publisher_country"))))
164 kvs.append((cls.q("institution_name"), forminfo.get("institution_name")))
165 kvs.append((cls.q("institution_country"), datasets.get_country_name(forminfo.get("institution_country"))))
167 # Copyright & licensing
168 lic = ", ".join(forminfo.get("license", []))
169 kvs.append((cls.q("license"), lic))
171 kvs.append((cls.q("license_attributes"), license_checkbox(forminfo.get("license_attributes", []))))
172 kvs.append((cls.q("license_terms_url"), forminfo.get("license_terms_url")))
173 kvs.append((cls.q("license_display"), yes_or_blank(forminfo.get("license_display"))))
175 kvs.append((cls.q("copyright_author_retains"), yes_no_or_blank(forminfo.get("copyright_author_retains"))))
176 kvs.append((cls.q("copyright_url"), forminfo.get("copyright_url")))
178 # Editorial
179 review_process = other_list("review_process", "review_process_other", "other")
180 kvs.append((cls.q("review_process"), review_process))
181 kvs.append((cls.q("review_url"), forminfo.get("review_url")))
182 kvs.append((cls.q("plagiarism_detection"), yes_no_or_blank(forminfo.get("plagiarism_detection"))))
183 kvs.append((cls.q("aims_scope_url"), forminfo.get("aims_scope_url")))
184 kvs.append((cls.q("editorial_board_url"), forminfo.get("editorial_board_url")))
185 kvs.append((cls.q("author_instructions_url"), forminfo.get("author_instructions_url")))
186 kvs.append((cls.q("publication_time_weeks"), str(forminfo.get("publication_time_weeks"))))
188 # Business Model
189 kvs.append((cls.q("apc"), yes_no_or_blank(forminfo.get("apc"))))
190 kvs.append((cls.q("apc_url"), forminfo.get("apc_url")))
191 apcs = []
192 for apc_charge in forminfo.get("apc_charges", []):
193 apcs.append(str(apc_charge.get("apc_max")) + " " + apc_charge.get("apc_currency"))
194 kvs.append((cls.q("apc_charges"), "; ".join(apcs)))
196 kvs.append((cls.q("has_waiver"), yes_no_or_blank(forminfo.get("has_waiver"))))
197 kvs.append((cls.q("waiver_url"), forminfo.get("waiver_url")))
198 kvs.append((cls.q("has_other_charges"), yes_no_or_blank(forminfo.get("has_other_charges"))))
199 kvs.append((cls.q("other_charges_url"), forminfo.get("other_charges_url")))
201 # Best Practice
202 dap = deepcopy(forminfo.get("preservation_service", []))
203 if "national_library" in dap: dap.remove("national_library")
204 if "other" in dap:
205 dap.remove("other")
206 dap.append(forminfo.get("preservation_service_other"))
207 if "none" in dap: dap.remove("none")
208 kvs.append((cls.q("preservation_service"), ", ".join(dap)))
209 kvs.append((cls.q("preservation_service_library"), "; ".join(forminfo.get("preservation_service_library", []))))
210 kvs.append((cls.q("preservation_service_url"), forminfo.get("preservation_service_url")))
212 deposit_policies = other_list("deposit_policy", "deposit_policy_other", "other")
213 kvs.append((cls.q("deposit_policy"), deposit_policies))
214 kvs.append((cls.q("deposit_policy_url"), forminfo.get("deposit_policy_url")))
216 article_identifiers = other_list("persistent_identifiers", "persistent_identifiers_other", "other")
217 kvs.append((cls.q("persistent_identifiers"), article_identifiers))
219 # Open Access Compliance (usually first on form)
220 kvs.append((cls.q("boai"), yes_or_blank(forminfo.get("boai"))))
222 kvs.append((cls.q("continues"), ", ".join(forminfo.get("continues"))))
223 kvs.append((cls.q("continued_by"), ", ".join(forminfo.get("continued_by"))))
225 kvs.append((cls.q("subject"), "|".join(forminfo.get("subject"))))
226 kvs.append((cls.q("s2o"), "Yes" if forminfo.get("s2o", False) else "No"))
227 kvs.append((cls.q("mirror"), "Yes" if forminfo.get("mirror", False) else "No"))
228 kvs.append((cls.q("ojc"), "Yes" if forminfo.get("ojc", False) else "No"))
230 return kvs
232 @classmethod
233 def question2form(cls, journal, questions):
234 """ Create a Journal (update) form from the CSV questions
235 :param journal - A journal object to base the form on
236 :param questions - a dict of header and value as provided by csv.DictReader
237 {'header': 'answer', 'header2': 'answer2'...}
239 :return: A MultiDict of updates to the form for validation and a formatted string of what was changed
240 """
242 def _y_or_blank(x):
243 """ Some of the work of undoing yes_or_blank() """
244 return 'y' if x == 'Yes' else ''
246 def _y_n_or_blank(x):
247 """ Undoing yes_no_or_blank() to 'y' or 'n' ONLY """
248 return 'y' if x == 'Yes' else 'n' if x == 'No' else ""
250 def _comma_to_list(x):
251 """ Comma separated string to list of stripped items """
252 return [_.strip() for _ in x.split(',')]
254 def _lang_codes(x):
255 """ Get the uppercase 2-char language string for each comma separated language name"""
256 langs = [datasets.language_for(_) for _ in _comma_to_list(x)]
257 return [l.alpha_2.upper() for l in langs if l is not None]
259 def _unfurl_apc(x):
260 """ Allow an APC update by splitting the APC string from the spreadsheet """
261 apcs = []
262 for apc in x.split('; '):
263 [amt, cur] = apc.split()
264 apcs.append({'apc_currency': cur, 'apc_max': round(float(amt))})
265 apcs.reverse()
266 return apcs
268 # Undo the transformations applied to specific fields. TODO: Add these as they are encountered in the wild
269 REVERSE_TRANSFORM_MAP = {
270 'keywords': _comma_to_list,
271 'language': _lang_codes,
272 'publisher_country': datasets.get_country_code,
273 'institution_country': datasets.get_country_code,
274 'license': _comma_to_list,
275 'license_display': _y_or_blank,
276 'copyright_author_retains': _y_n_or_blank,
277 'review_process': _comma_to_list,
278 'plagiarism_detection': _y_n_or_blank,
279 'publication_time_weeks': lambda x: round(float(x)),
280 'apc': _y_n_or_blank,
281 'apc_charges': _unfurl_apc,
282 'has_waiver': _y_n_or_blank,
283 'has_other_charges': _y_n_or_blank,
284 'deposit_policy': _comma_to_list,
285 'preservation_service': _comma_to_list,
286 'persistent_identifiers': _comma_to_list,
287 'boai': _y_or_blank,
288 's2o': _y_n_or_blank,
289 'mirror': _y_n_or_blank,
290 'ojc': _y_n_or_blank
291 }
293 def csv2formval(key, value):
294 # We keep getting naff data with trailing whitespace, so strip those out
295 if isinstance(value, str):
296 value = value.strip()
298 # Apply the reverse transformation back from display value to storage value.
299 try:
300 return REVERSE_TRANSFORM_MAP[key](value)
301 except KeyError:
302 # This field doesn't appear in the map, return unchanged.
303 return value
304 except ValueError:
305 raise QuestionTransformError(key, value, message="Could not transform value")
307 # start by converting the object to the forminfo version
308 # ~~->JournalForm:Crosswalk~~
309 forminfo = JournalFormXWalk.obj2form(journal)
311 # Get the CSV output this journal currently produces so that we can skip over unchanged fields
312 current_csv = {x: y for (x, y) in cls.journal2question(journal)}
314 # Collect the update report
315 updates = []
317 for k, v in questions.items():
318 # To save us writing all of the reverse transforms, we can skip this question entirely if unchanged from current journal
319 if k in current_csv and current_csv[k] == v:
320 continue
322 # Only deal with a question if there's a value - TODO: what does this mean for yes_no_or_blank?
323 if isinstance(v, str) and len(v.strip()) > 0:
324 # Get the question key from the CSV column header
325 form_key = cls.p(k)
327 # Account for columns that might not correspond to the form
328 if form_key is not None:
330 # Only update if the value is changed (apply the reverse transformation to get the form value back)
331 current_val = forminfo.get(form_key)
332 update_val = csv2formval(form_key, v)
333 if str(current_val) != str(update_val):
334 updates.append('Updating {0}, from "{1}" to "{2}"'.format(form_key, current_val, update_val))
335 forminfo[form_key] = update_val
337 return JournalFormXWalk.forminfo2multidict(forminfo), updates
340class Journal2PublisherUploadQuestionsXwalk(Journal2QuestionXwalk):
341 # NOTE: This change was put in originally because the test data had a changed header from the regular
342 # journal csv. That has been reversed now, but I'm leaving this in as a reminder that we can
343 # patch over the questions as we need in future.
344 # QTUP = Journal2QuestionXwalk.QTUP
345 # QTUP[[i for i, v in enumerate(QTUP) if v[0] == "other_charges_url"][0]] = ("other_charges_url", "Other fees information URL (only if answer is in Column J is 'Yes')")
347 REQUIRED = [
348 "pissn",
349 "eissn"
350 ]
352 @classmethod
353 def required_questions(cls):
354 return [q for id, q in cls.QTUP if id in cls.REQUIRED]