Coverage for portality/crosswalks/journal_questions.py: 65%

176 statements  

« prev     ^ index     » next       coverage.py v6.4.2, created at 2022-07-19 18:38 +0100

1from copy import deepcopy 

2from portality import datasets 

3from portality.crosswalks.journal_form import JournalFormXWalk 

4from portality.forms.application_forms import ApplicationFormFactory 

5 

6 

7class JournalXwalkException(Exception): 

8 pass 

9 

10 

11class Journal2QuestionXwalk(object): 

12 """ 

13 ~~JournalQuestions:Crosswalk->Journal:Form~~ 

14 """ 

15 QTUP = [ 

16 ("alternative_title", "Alternative title"), 

17 ("apc_charges", "APC amount"), 

18 ("apc_url", "APC information URL"), 

19 ("preservation_service", "Preservation Services"), 

20 ("preservation_service_library", "Preservation Service: national library"), 

21 ("preservation_service_url", "Preservation information URL"), 

22 ("copyright_author_retains", "Author holds copyright without restrictions"), 

23 ("copyright_url", "Copyright information URL"), 

24 ("publisher_country", "Country of publisher"), 

25 ("deposit_policy", "Deposit policy directory"), 

26 ("review_process", "Review process"), 

27 ("review_url", "Review process information URL"), 

28 ("pissn", "Journal ISSN (print version)"), 

29 ("eissn", "Journal EISSN (online version)"), 

30 ("continues", "Continues"), 

31 ("continued_by", "Continued By"), 

32 ("institution_name", "Society or institution"), 

33 ("keywords", "Keywords"), 

34 ("language", "Languages in which the journal accepts manuscripts"), 

35 ("license_attributes", "License attributes"), 

36 ("license_display", "Machine-readable CC licensing information embedded or displayed in articles"), 

37 ("license_display_example_url", "URL to an example page with embedded licensing information"), 

38 ("boai", "Does the journal comply to DOAJ's definition of open access?"), 

39 ("license", "Journal license"), 

40 ("license_terms_url", "URL for license terms"), 

41 ("oa_statement_url", "URL for journal's Open Access statement"), 

42 ("oa_start", "When did the journal start to publish all content using an open license?"), 

43 ("journal_url", "Journal URL"), 

44 ("aims_scope_url", "URL for journal's aims & scope"), 

45 ("editorial_board_url", "URL for the Editorial Board page"), 

46 ("author_instructions_url", "URL for journal's instructions for authors"), 

47 ("waiver_url", "Waiver policy information URL"), 

48 ("persistent_identifiers", "Persistent article identifiers"), 

49 ("plagiarism_detection", "Journal plagiarism screening policy"), 

50 ("plagiarism_url", "Plagiarism information URL"), 

51 ("publication_time_weeks", "Average number of weeks between article submission and publication"), 

52 ("publisher_name", "Publisher"), 

53 ("other_charges_url", "Other fees information URL"), 

54 ("title", "Journal title"), 

55 ("institution_country", "Country of society or institution"), 

56 ("apc", "APC"), 

57 ("has_other_charges", "Has other fees"), 

58 ("has_waiver", "Journal waiver policy (for developing country authors etc)"), 

59 ("orcid_ids", "Article metadata includes ORCIDs"), 

60 ("open_citations", "Journal complies with I4OC standards for open citations"), 

61 ("deposit_policy_url", "URL for deposit policy"), 

62 ("subject", "LCC Codes") 

63 ] 

64 

65 DEGEN = { 

66 "preservation_service_other": "preservation_service", 

67 "deposit_policy_other": "deposit_policy", 

68 "review_process_other": "review_process", 

69 "persistent_identifiers_other": "persistent_identifiers" 

70 } 

71 

72 @classmethod 

73 def q(cls, ident): 

74 if ident in cls.DEGEN: 

75 ident = cls.DEGEN[ident] 

76 for k, q in cls.QTUP: 

77 if k == ident: 

78 return q 

79 return None 

80 

81 @classmethod 

82 def q2idx(cls, ident): 

83 if ident in cls.DEGEN: 

84 ident = cls.DEGEN[ident] 

85 i = 0 

86 for k, q in cls.QTUP: 

87 if k == ident: 

88 return i 

89 i += 1 

90 return -1 

91 

92 @classmethod 

93 def p(cls, ident): 

94 """ p is a backwards q - i.e. get the question from the CSV heading """ 

95 if ident in cls.DEGEN.values(): 

96 for k, v in cls.DEGEN.items(): 

97 if ident == v: 

98 ident = k 

99 for k, q in cls.QTUP: 

100 if q == ident: 

101 return k 

102 return None 

103 

104 @classmethod 

105 def question_list(cls): 

106 return [q for _, q in cls.QTUP] 

107 

108 @classmethod 

109 def journal2question(cls, journal): 

110 """ 

111 Convert the journal data to key value pairs for use in the Journal CSV 

112 """ 

113 

114 def yes_no_or_blank(val): 

115 return "Yes" if val in [True, "True", "Yes", "true", "yes", "y"] else "No" if val is not None else "" 

116 

117 def other_list(main_field, other_field, other_value): 

118 aids = forminfo.get(main_field, []) 

119 if aids is None or aids == "" or aids == "None": 

120 aids = [] 

121 

122 # if the xwalk has returned a single-list element like ["None"] 

123 # we want to strip that "None" for the purpose of the CSV 

124 if "none" in aids: 

125 aids.remove("none") 

126 

127 aidother = forminfo.get(other_field) 

128 

129 if other_value in aids: 

130 aids.remove(other_value) 

131 if aidother is not None and aidother != "" and aidother != "None": 

132 aids.append(aidother) 

133 return ", ".join(aids) 

134 

135 def yes_or_blank(val): 

136 return "Yes" if val in [True, "True", "Yes", "true", "yes", "y"] else '' 

137 

138 def license_checkbox(val): 

139 opts = {} 

140 [opts.update({k: v}) for k, v in ApplicationFormFactory.choices_for("license_attributes")] 

141 nv = [opts.get(v) for v in val] 

142 return ", ".join(nv) 

143 

144 def languages(vals): 

145 keep = [] 

146 codes = [c.lower() for c, _ in datasets.language_options] 

147 names = [n.lower() for _, n in datasets.language_options] 

148 for v in vals: 

149 if v.lower() in codes: 

150 keep.append(datasets.name_for_lang(v)) 

151 elif v.lower() in names: 

152 keep.append(v) 

153 return ", ".join(keep) 

154 

155 # start by converting the object to the forminfo version 

156 forminfo = JournalFormXWalk.obj2form(journal) 

157 

158 kvs = [] 

159 

160 # create key/value pairs for the questions in order 

161 # About 

162 kvs.append((cls.q("title"), forminfo.get("title"))) 

163 kvs.append((cls.q("journal_url"), forminfo.get("journal_url"))) 

164 kvs.append((cls.q("oa_start"), forminfo.get("oa_start"))) 

165 kvs.append((cls.q("alternative_title"), forminfo.get("alternative_title"))) 

166 kvs.append((cls.q("pissn"), forminfo.get("pissn"))) 

167 kvs.append((cls.q("eissn"), forminfo.get("eissn"))) 

168 kvs.append((cls.q("keywords"), ", ".join(forminfo.get("keywords", [])))) 

169 kvs.append((cls.q("language"), languages(forminfo.get("language", [])))) 

170 kvs.append((cls.q("publisher_name"), forminfo.get("publisher_name"))) 

171 kvs.append((cls.q("publisher_country"), datasets.get_country_name(forminfo.get("publisher_country")))) 

172 kvs.append((cls.q("institution_name"), forminfo.get("institution_name"))) 

173 kvs.append((cls.q("institution_country"), datasets.get_country_name(forminfo.get("institution_country")))) 

174 

175 # Copyright & licensing 

176 lic = ", ".join(forminfo.get("license", [])) 

177 kvs.append((cls.q("license"), lic)) 

178 

179 kvs.append((cls.q("license_attributes"), license_checkbox(forminfo.get("license_attributes", [])))) 

180 kvs.append((cls.q("license_terms_url"), forminfo.get("license_terms_url"))) 

181 kvs.append((cls.q("license_display"), yes_or_blank(forminfo.get("license_display")))) 

182 kvs.append((cls.q("license_display_example_url"), forminfo.get("license_display_example_url"))) 

183 

184 kvs.append((cls.q("copyright_author_retains"), yes_no_or_blank(forminfo.get("copyright_author_retains")))) 

185 kvs.append((cls.q("copyright_url"), forminfo.get("copyright_url"))) 

186 

187 # Editorial 

188 review_process = other_list("review_process", "review_process_other", "other") 

189 kvs.append((cls.q("review_process"), review_process)) 

190 kvs.append((cls.q("review_url"), forminfo.get("review_url"))) 

191 kvs.append((cls.q("plagiarism_detection"), yes_no_or_blank(forminfo.get("plagiarism_detection")))) 

192 kvs.append((cls.q("plagiarism_url"), forminfo.get("plagiarism_url"))) 

193 kvs.append((cls.q("aims_scope_url"), forminfo.get("aims_scope_url"))) 

194 kvs.append((cls.q("editorial_board_url"), forminfo.get("editorial_board_url"))) 

195 kvs.append((cls.q("author_instructions_url"), forminfo.get("author_instructions_url"))) 

196 kvs.append((cls.q("publication_time_weeks"), str(forminfo.get("publication_time_weeks")))) 

197 

198 # Business Model 

199 kvs.append((cls.q("apc"), yes_no_or_blank(forminfo.get("apc")))) 

200 kvs.append((cls.q("apc_url"), forminfo.get("apc_url"))) 

201 apcs = [] 

202 for apc_charge in forminfo.get("apc_charges", []): 

203 apcs.append(str(apc_charge.get("apc_max")) + " " + apc_charge.get("apc_currency")) 

204 kvs.append((cls.q("apc_charges"), "; ".join(apcs))) 

205 

206 kvs.append((cls.q("has_waiver"), yes_no_or_blank(forminfo.get("has_waiver")))) 

207 kvs.append((cls.q("waiver_url"), forminfo.get("waiver_url"))) 

208 kvs.append((cls.q("has_other_charges"), yes_no_or_blank(forminfo.get("has_other_charges")))) 

209 kvs.append((cls.q("other_charges_url"), forminfo.get("other_charges_url"))) 

210 

211 # Best Practice 

212 dap = deepcopy(forminfo.get("preservation_service", [])) 

213 if "national_library" in dap: dap.remove("national_library") 

214 if "other" in dap: 

215 dap.remove("other") 

216 dap.append(forminfo.get("preservation_service_other")) 

217 if "none" in dap: dap.remove("none") 

218 kvs.append((cls.q("preservation_service"), ", ".join(dap))) 

219 kvs.append((cls.q("preservation_service_library"), "; ".join(forminfo.get("preservation_service_library", [])))) 

220 kvs.append((cls.q("preservation_service_url"), forminfo.get("preservation_service_url"))) 

221 

222 deposit_policies = other_list("deposit_policy", "deposit_policy_other", "other") 

223 kvs.append((cls.q("deposit_policy"), deposit_policies)) 

224 kvs.append((cls.q("deposit_policy_url"), forminfo.get("deposit_policy_url"))) 

225 

226 article_identifiers = other_list("persistent_identifiers", "persistent_identifiers_other", "other") 

227 kvs.append((cls.q("persistent_identifiers"), article_identifiers)) 

228 kvs.append((cls.q("orcid_ids"), yes_no_or_blank(forminfo.get("orcid_ids")))) 

229 kvs.append((cls.q("open_citations"), yes_no_or_blank(forminfo.get("open_citations")))) 

230 

231 # Open Access Compliance (usually first on form) 

232 kvs.append((cls.q("boai"), yes_or_blank(forminfo.get("boai")))) 

233 kvs.append((cls.q("oa_statement_url"), forminfo.get("oa_statement_url"))) 

234 

235 kvs.append((cls.q("continues"), ", ".join(forminfo.get("continues")))) 

236 kvs.append((cls.q("continued_by"), ", ".join(forminfo.get("continued_by")))) 

237 

238 kvs.append((cls.q("subject"), "|".join(forminfo.get("subject")))) 

239 

240 return kvs 

241 

242 @classmethod 

243 def question2form(cls, journal, questions): 

244 """ Create a Journal (update) form from the CSV questions 

245 :param journal - A journal object to base the form on 

246 :param questions - a dict of header and value as provided by csv.DictReader 

247 {'header': 'answer', 'header2': 'answer2'...} 

248 

249 :return: A MultiDict of updates to the form for validation and a formatted string of what was changed 

250 """ 

251 

252 def _y_or_blank(x): 

253 """ Some of the work of undoing yes_or_blank() """ 

254 return 'y' if x == 'Yes' else '' 

255 

256 def _y_n_or_blank(x): 

257 """ Undoing yes_no_or_blank() to 'y' or 'n' ONLY """ 

258 return 'y' if x == 'Yes' else 'n' if x != '' else None 

259 

260 def _comma_to_list(x): 

261 """ Comma separated string to list of stripped items """ 

262 return [_.strip() for _ in x.split(',')] 

263 

264 def _lang_codes(x): 

265 """ Get the uppercase 2-char language string for each comma separated language name""" 

266 langs = [datasets.language_for(_) for _ in _comma_to_list(x)] 

267 return [l.alpha_2.upper() for l in langs if l is not None] 

268 

269 def _unfurl_apc(x): 

270 """ Allow an APC update by splitting the APC string from the spreadsheet """ 

271 apcs = [] 

272 for apc in x.split('; '): 

273 [amt, cur] = apc.split() 

274 apcs.append({'apc_max': round(float(amt)), 'apc_currency': cur}) 

275 return apcs 

276 

277 # Undo the transformations applied to specific fields. TODO: Add these as they are encountered in the wild 

278 REVERSE_TRANSFORM_MAP = { 

279 'keywords': _comma_to_list, 

280 'language': _lang_codes, 

281 'publisher_country': datasets.get_country_code, 

282 'institution_country': datasets.get_country_code, 

283 'license': _comma_to_list, 

284 'license_display': _y_or_blank, 

285 'copyright_author_retains': _y_n_or_blank, 

286 'review_process': _comma_to_list, 

287 'plagiarism_detection': _y_n_or_blank, 

288 'publication_time_weeks': lambda x: round(float(x)), 

289 'apc': _y_or_blank, 

290 'apc_charges': _unfurl_apc, 

291 'has_waiver': _y_n_or_blank, 

292 'has_other_charges': _y_n_or_blank, 

293 'deposit_policy': _comma_to_list, 

294 'preservation_service': _comma_to_list, 

295 'persistent_identifiers': _comma_to_list, 

296 'orcid_ids': _y_n_or_blank, 

297 'open_citations': _y_n_or_blank, 

298 'boai': _y_or_blank, 

299 } 

300 

301 def csv2formval(key, value): 

302 # We keep getting naff data with trailing whitespace, so strip those out 

303 if isinstance(value, str): 

304 value = value.strip() 

305 

306 # Apply the reverse transformation back from display value to storage value. 

307 try: 

308 return REVERSE_TRANSFORM_MAP[key](value) 

309 except KeyError: 

310 # This field doesn't appear in the map, return unchanged. 

311 return value 

312 

313 # start by converting the object to the forminfo version 

314 # ~~->JournalForm:Crosswalk~~ 

315 forminfo = JournalFormXWalk.obj2form(journal) 

316 

317 # Get the CSV output this journal currently produces so that we can skip over unchanged fields 

318 current_csv = {x: y for (x, y) in cls.journal2question(journal)} 

319 

320 # Collect the update report 

321 updates = [] 

322 

323 for k, v in questions.items(): 

324 # To save us writing all of the reverse transforms, we can skip this question entirely if unchanged from current journal 

325 if k in current_csv and current_csv[k] == v: 

326 continue 

327 

328 # Only deal with a question if there's a value - TODO: what does this mean for yes_no_or_blank? 

329 if len(v.strip()) > 0: 

330 # Get the question key from the CSV column header 

331 form_key = cls.p(k) 

332 

333 # Account for columns that might not correspond to the form 

334 if form_key is not None: 

335 

336 # Only update if the value is changed (apply the reverse transformation to get the form value back) 

337 current_val = forminfo.get(form_key) 

338 update_val = csv2formval(form_key, v) 

339 if current_val != update_val: 

340 updates.append('Updating {0}, from "{1}" to "{2}"'.format(form_key, current_val, update_val)) 

341 forminfo[form_key] = update_val 

342 

343 return JournalFormXWalk.forminfo2multidict(forminfo), updates