Coverage for portality / crosswalks / journal_questions.py: 92%

177 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-05-05 00:09 +0100

1from copy import deepcopy 

2from portality import datasets 

3from portality.crosswalks.journal_form import JournalFormXWalk 

4from portality.forms.application_forms import ApplicationFormFactory 

5 

6 

7class JournalXwalkException(Exception): 

8 pass 

9 

10class QuestionTransformError(JournalXwalkException): 

11 def __init__(self, key, value, message): 

12 self.key = key 

13 self.value = value 

14 super(QuestionTransformError, self).__init__(message) 

15 

16class Journal2QuestionXwalk(object): 

17 """ 

18 ~~JournalQuestions:Crosswalk->Journal:Form~~ 

19 """ 

20 QTUP = [ 

21 ("alternative_title", "Alternative title"), 

22 ("apc_charges", "APC amount"), 

23 ("apc_url", "APC information URL"), 

24 ("preservation_service", "Preservation Services"), 

25 ("preservation_service_library", "Preservation Service: national library"), 

26 ("preservation_service_url", "Preservation information URL"), 

27 ("copyright_author_retains", "Author holds copyright without restrictions"), 

28 ("copyright_url", "Copyright information URL"), 

29 ("publisher_country", "Country of publisher"), 

30 ("deposit_policy", "Deposit policy directory"), 

31 ("review_process", "Review process"), 

32 ("review_url", "Review process information URL"), 

33 ("pissn", "Journal ISSN (print version)"), 

34 ("eissn", "Journal EISSN (online version)"), 

35 ("continues", "Continues"), 

36 ("continued_by", "Continued By"), 

37 ("institution_name", "Other organisation"), 

38 ("keywords", "Keywords"), 

39 ("language", "Languages in which the journal accepts manuscripts"), 

40 ("license_attributes", "License attributes"), 

41 ("license_display", "Machine-readable CC licensing information embedded or displayed in articles"), 

42 ("boai", "Does the journal comply to DOAJ's definition of open access?"), 

43 ("license", "Journal license"), 

44 ("license_terms_url", "URL for license terms"), 

45 ("oa_start", "When did the journal start to publish all content using an open license?"), 

46 ("journal_url", "Journal URL"), 

47 ("aims_scope_url", "URL for journal's aims & scope"), 

48 ("editorial_board_url", "URL for the Editorial Board page"), 

49 ("author_instructions_url", "URL for journal's instructions for authors"), 

50 ("waiver_url", "Waiver policy information URL"), 

51 ("persistent_identifiers", "Persistent article identifiers"), 

52 ("plagiarism_detection", "Journal plagiarism screening policy"), 

53 ("publication_time_weeks", "Average number of weeks between article submission and publication"), 

54 ("publisher_name", "Publisher"), 

55 ("other_charges_url", "Other fees information URL"), 

56 ("title", "Journal title"), 

57 ("institution_country", "Country of other organisation"), 

58 ("apc", "APC"), 

59 ("has_other_charges", "Has other fees"), 

60 ("has_waiver", "Journal waiver policy (for developing country authors etc)"), 

61 ("deposit_policy_url", "URL for deposit policy"), 

62 ("subject", "LCC Codes"), 

63 ("s2o", "Subscribe to Open"), 

64 ("mirror", "Mirror Journal"), 

65 ("ojc", "Open Journals Collective"), 

66 ] 

67 

68 DEGEN = { 

69 "preservation_service_other": "preservation_service", 

70 "deposit_policy_other": "deposit_policy", 

71 "review_process_other": "review_process", 

72 "persistent_identifiers_other": "persistent_identifiers" 

73 } 

74 

75 @classmethod 

76 def q(cls, ident, val=None): 

77 if ident in cls.DEGEN: 

78 ident = cls.DEGEN[ident] 

79 for k, q in cls.QTUP: 

80 if k == ident: 

81 return q 

82 return None 

83 

84 @classmethod 

85 def p(cls, ident): 

86 """ p is a backwards q - i.e. get the question from the CSV heading """ 

87 if ident in cls.DEGEN.values(): 

88 for k, v in cls.DEGEN.items(): 

89 if ident.lower() == v.lower(): 

90 ident = k 

91 for k, q in cls.QTUP: 

92 if q.lower() == ident.lower(): 

93 return k 

94 return None 

95 

96 @classmethod 

97 def question_list(cls): 

98 return [q for _, q in cls.QTUP] 

99 

100 @classmethod 

101 def journal2question(cls, journal): 

102 """ 

103 Convert the journal data to key value pairs for use in the Journal CSV 

104 """ 

105 

106 def yes_no_or_blank(val): 

107 return "Yes" if val in [True, "True", "Yes", "true", "yes", "y"] else "No" if val is not None else "" 

108 

109 def other_list(main_field, other_field, other_value): 

110 aids = forminfo.get(main_field, []) 

111 if aids is None or aids == "" or aids == "None": 

112 aids = [] 

113 

114 # if the xwalk has returned a single-list element like ["None"] 

115 # we want to strip that "None" for the purpose of the CSV 

116 if "none" in aids: 

117 aids.remove("none") 

118 

119 aidother = forminfo.get(other_field) 

120 

121 if other_value in aids: 

122 aids.remove(other_value) 

123 if aidother is not None and aidother != "" and aidother != "None": 

124 aids.append(aidother) 

125 return ", ".join(aids) 

126 

127 def yes_or_blank(val): 

128 return "Yes" if val in [True, "True", "Yes", "true", "yes", "y"] else '' 

129 

130 def license_checkbox(val): 

131 opts = {} 

132 [opts.update({k: v}) for k, v in ApplicationFormFactory.choices_for("license_attributes")] 

133 nv = [opts.get(v) for v in val] 

134 return ", ".join(str(v) for v in nv) 

135 

136 def languages(vals): 

137 keep = [] 

138 codes = [c.lower() for c, _ in datasets.language_options] 

139 names = [n.lower() for _, n in datasets.language_options] 

140 for v in vals: 

141 if v.lower() in codes: 

142 keep.append(datasets.name_for_lang(v)) 

143 elif v.lower() in names: 

144 keep.append(v) 

145 return ", ".join(keep) 

146 

147 # start by converting the object to the forminfo version 

148 forminfo = JournalFormXWalk.obj2form(journal) 

149 

150 kvs = [] 

151 

152 # create key/value pairs for the questions in order 

153 # About 

154 kvs.append((cls.q("title"), forminfo.get("title"))) 

155 kvs.append((cls.q("journal_url"), forminfo.get("journal_url"))) 

156 kvs.append((cls.q("oa_start"), forminfo.get("oa_start"))) 

157 kvs.append((cls.q("alternative_title"), forminfo.get("alternative_title"))) 

158 kvs.append((cls.q("pissn"), forminfo.get("pissn"))) 

159 kvs.append((cls.q("eissn"), forminfo.get("eissn"))) 

160 kvs.append((cls.q("keywords"), ", ".join(forminfo.get("keywords", [])))) 

161 kvs.append((cls.q("language"), languages(forminfo.get("language", [])))) 

162 kvs.append((cls.q("publisher_name"), forminfo.get("publisher_name"))) 

163 kvs.append((cls.q("publisher_country"), datasets.get_country_name(forminfo.get("publisher_country")))) 

164 kvs.append((cls.q("institution_name"), forminfo.get("institution_name"))) 

165 kvs.append((cls.q("institution_country"), datasets.get_country_name(forminfo.get("institution_country")))) 

166 

167 # Copyright & licensing 

168 lic = ", ".join(forminfo.get("license", [])) 

169 kvs.append((cls.q("license"), lic)) 

170 

171 kvs.append((cls.q("license_attributes"), license_checkbox(forminfo.get("license_attributes", [])))) 

172 kvs.append((cls.q("license_terms_url"), forminfo.get("license_terms_url"))) 

173 kvs.append((cls.q("license_display"), yes_or_blank(forminfo.get("license_display")))) 

174 

175 kvs.append((cls.q("copyright_author_retains"), yes_no_or_blank(forminfo.get("copyright_author_retains")))) 

176 kvs.append((cls.q("copyright_url"), forminfo.get("copyright_url"))) 

177 

178 # Editorial 

179 review_process = other_list("review_process", "review_process_other", "other") 

180 kvs.append((cls.q("review_process"), review_process)) 

181 kvs.append((cls.q("review_url"), forminfo.get("review_url"))) 

182 kvs.append((cls.q("plagiarism_detection"), yes_no_or_blank(forminfo.get("plagiarism_detection")))) 

183 kvs.append((cls.q("aims_scope_url"), forminfo.get("aims_scope_url"))) 

184 kvs.append((cls.q("editorial_board_url"), forminfo.get("editorial_board_url"))) 

185 kvs.append((cls.q("author_instructions_url"), forminfo.get("author_instructions_url"))) 

186 kvs.append((cls.q("publication_time_weeks"), str(forminfo.get("publication_time_weeks")))) 

187 

188 # Business Model 

189 kvs.append((cls.q("apc"), yes_no_or_blank(forminfo.get("apc")))) 

190 kvs.append((cls.q("apc_url"), forminfo.get("apc_url"))) 

191 apcs = [] 

192 for apc_charge in forminfo.get("apc_charges", []): 

193 apcs.append(str(apc_charge.get("apc_max")) + " " + apc_charge.get("apc_currency")) 

194 kvs.append((cls.q("apc_charges"), "; ".join(apcs))) 

195 

196 kvs.append((cls.q("has_waiver"), yes_no_or_blank(forminfo.get("has_waiver")))) 

197 kvs.append((cls.q("waiver_url"), forminfo.get("waiver_url"))) 

198 kvs.append((cls.q("has_other_charges"), yes_no_or_blank(forminfo.get("has_other_charges")))) 

199 kvs.append((cls.q("other_charges_url"), forminfo.get("other_charges_url"))) 

200 

201 # Best Practice 

202 dap = deepcopy(forminfo.get("preservation_service", [])) 

203 if "national_library" in dap: dap.remove("national_library") 

204 if "other" in dap: 

205 dap.remove("other") 

206 dap.append(forminfo.get("preservation_service_other")) 

207 if "none" in dap: dap.remove("none") 

208 kvs.append((cls.q("preservation_service"), ", ".join(dap))) 

209 kvs.append((cls.q("preservation_service_library"), "; ".join(forminfo.get("preservation_service_library", [])))) 

210 kvs.append((cls.q("preservation_service_url"), forminfo.get("preservation_service_url"))) 

211 

212 deposit_policies = other_list("deposit_policy", "deposit_policy_other", "other") 

213 kvs.append((cls.q("deposit_policy"), deposit_policies)) 

214 kvs.append((cls.q("deposit_policy_url"), forminfo.get("deposit_policy_url"))) 

215 

216 article_identifiers = other_list("persistent_identifiers", "persistent_identifiers_other", "other") 

217 kvs.append((cls.q("persistent_identifiers"), article_identifiers)) 

218 

219 # Open Access Compliance (usually first on form) 

220 kvs.append((cls.q("boai"), yes_or_blank(forminfo.get("boai")))) 

221 

222 kvs.append((cls.q("continues"), ", ".join(forminfo.get("continues")))) 

223 kvs.append((cls.q("continued_by"), ", ".join(forminfo.get("continued_by")))) 

224 

225 kvs.append((cls.q("subject"), "|".join(forminfo.get("subject")))) 

226 kvs.append((cls.q("s2o"), "Yes" if forminfo.get("s2o", False) else "No")) 

227 kvs.append((cls.q("mirror"), "Yes" if forminfo.get("mirror", False) else "No")) 

228 kvs.append((cls.q("ojc"), "Yes" if forminfo.get("ojc", False) else "No")) 

229 

230 return kvs 

231 

232 @classmethod 

233 def question2form(cls, journal, questions): 

234 """ Create a Journal (update) form from the CSV questions 

235 :param journal - A journal object to base the form on 

236 :param questions - a dict of header and value as provided by csv.DictReader 

237 {'header': 'answer', 'header2': 'answer2'...} 

238 

239 :return: A MultiDict of updates to the form for validation and a formatted string of what was changed 

240 """ 

241 

242 def _y_or_blank(x): 

243 """ Some of the work of undoing yes_or_blank() """ 

244 return 'y' if x == 'Yes' else '' 

245 

246 def _y_n_or_blank(x): 

247 """ Undoing yes_no_or_blank() to 'y' or 'n' ONLY """ 

248 return 'y' if x == 'Yes' else 'n' if x == 'No' else "" 

249 

250 def _comma_to_list(x): 

251 """ Comma separated string to list of stripped items """ 

252 return [_.strip() for _ in x.split(',')] 

253 

254 def _lang_codes(x): 

255 """ Get the uppercase 2-char language string for each comma separated language name""" 

256 langs = [datasets.language_for(_) for _ in _comma_to_list(x)] 

257 return [l.alpha_2.upper() for l in langs if l is not None] 

258 

259 def _unfurl_apc(x): 

260 """ Allow an APC update by splitting the APC string from the spreadsheet """ 

261 apcs = [] 

262 for apc in x.split('; '): 

263 [amt, cur] = apc.split() 

264 apcs.append({'apc_currency': cur, 'apc_max': round(float(amt))}) 

265 apcs.reverse() 

266 return apcs 

267 

268 # Undo the transformations applied to specific fields. TODO: Add these as they are encountered in the wild 

269 REVERSE_TRANSFORM_MAP = { 

270 'keywords': _comma_to_list, 

271 'language': _lang_codes, 

272 'publisher_country': datasets.get_country_code, 

273 'institution_country': datasets.get_country_code, 

274 'license': _comma_to_list, 

275 'license_display': _y_or_blank, 

276 'copyright_author_retains': _y_n_or_blank, 

277 'review_process': _comma_to_list, 

278 'plagiarism_detection': _y_n_or_blank, 

279 'publication_time_weeks': lambda x: round(float(x)), 

280 'apc': _y_n_or_blank, 

281 'apc_charges': _unfurl_apc, 

282 'has_waiver': _y_n_or_blank, 

283 'has_other_charges': _y_n_or_blank, 

284 'deposit_policy': _comma_to_list, 

285 'preservation_service': _comma_to_list, 

286 'persistent_identifiers': _comma_to_list, 

287 'boai': _y_or_blank, 

288 's2o': _y_n_or_blank, 

289 'mirror': _y_n_or_blank, 

290 'ojc': _y_n_or_blank 

291 } 

292 

293 def csv2formval(key, value): 

294 # We keep getting naff data with trailing whitespace, so strip those out 

295 if isinstance(value, str): 

296 value = value.strip() 

297 

298 # Apply the reverse transformation back from display value to storage value. 

299 try: 

300 return REVERSE_TRANSFORM_MAP[key](value) 

301 except KeyError: 

302 # This field doesn't appear in the map, return unchanged. 

303 return value 

304 except ValueError: 

305 raise QuestionTransformError(key, value, message="Could not transform value") 

306 

307 # start by converting the object to the forminfo version 

308 # ~~->JournalForm:Crosswalk~~ 

309 forminfo = JournalFormXWalk.obj2form(journal) 

310 

311 # Get the CSV output this journal currently produces so that we can skip over unchanged fields 

312 current_csv = {x: y for (x, y) in cls.journal2question(journal)} 

313 

314 # Collect the update report 

315 updates = [] 

316 

317 for k, v in questions.items(): 

318 # To save us writing all of the reverse transforms, we can skip this question entirely if unchanged from current journal 

319 if k in current_csv and current_csv[k] == v: 

320 continue 

321 

322 # Only deal with a question if there's a value - TODO: what does this mean for yes_no_or_blank? 

323 if isinstance(v, str) and len(v.strip()) > 0: 

324 # Get the question key from the CSV column header 

325 form_key = cls.p(k) 

326 

327 # Account for columns that might not correspond to the form 

328 if form_key is not None: 

329 

330 # Only update if the value is changed (apply the reverse transformation to get the form value back) 

331 current_val = forminfo.get(form_key) 

332 update_val = csv2formval(form_key, v) 

333 if str(current_val) != str(update_val): 

334 updates.append('Updating {0}, from "{1}" to "{2}"'.format(form_key, current_val, update_val)) 

335 forminfo[form_key] = update_val 

336 

337 return JournalFormXWalk.forminfo2multidict(forminfo), updates 

338 

339 

340class Journal2PublisherUploadQuestionsXwalk(Journal2QuestionXwalk): 

341 # NOTE: This change was put in originally because the test data had a changed header from the regular 

342 # journal csv. That has been reversed now, but I'm leaving this in as a reminder that we can 

343 # patch over the questions as we need in future. 

344 # QTUP = Journal2QuestionXwalk.QTUP 

345 # QTUP[[i for i, v in enumerate(QTUP) if v[0] == "other_charges_url"][0]] = ("other_charges_url", "Other fees information URL (only if answer is in Column J is 'Yes')") 

346 

347 REQUIRED = [ 

348 "pissn", 

349 "eissn" 

350 ] 

351 

352 @classmethod 

353 def required_questions(cls): 

354 return [q for id, q in cls.QTUP if id in cls.REQUIRED]