Coverage for portality/view/oaipmh.py: 90%

543 statements  

« prev     ^ index     » next       coverage.py v6.4.2, created at 2022-07-22 15:59 +0100

1import json, base64 

2from lxml import etree 

3from datetime import datetime, timedelta 

4from flask import Blueprint, request, make_response 

5from portality.core import app 

6from portality.models import OAIPMHJournal, OAIPMHArticle 

7from portality.lib import plausible 

8from portality.crosswalks.oaipmh import CROSSWALKS, make_set_spec, make_oai_identifier 

9 

10blueprint = Blueprint('oaipmh', __name__) 

11 

12 

13##################################################################### 

14# Web API endpoints 

15##################################################################### 

16 

17@blueprint.route("/oai", methods=["GET", "POST"]) 

18@blueprint.route("/oai.<specified>", methods=["GET", "POST"]) 

19def oaipmh(specified=None): 

20 # Google Analytics event, we don't know the action yet but it will be required. 

21 event_payload = {} 

22 # work out which endpoint we're going to 

23 if specified is None: 

24 dao = OAIPMHJournal() 

25 event_payload['label'] = 'Journal' 

26 else: 

27 specified = specified.lower() 

28 dao = OAIPMHArticle() 

29 event_payload['label'] = 'Article' 

30 

31 # Add the identifier to the event if there is one 

32 ident = request.values.get('identifier', None) 

33 if ident is not None: 

34 event_payload[app.config.get('GA_DIMENSIONS')['oai_res_id']] = ident 

35 

36 # work out the verb and associated parameters 

37 verb = request.values.get("verb") 

38 event_payload['action'] = verb 

39 

40 # Now we have enough information about the request to send to analytics. 

41 plausible.send_event(app.config.get('GA_CATEGORY_OPENURL', 'OpenURL'), 

42 **event_payload) 

43 

44 # call the appropriate protocol operation: 

45 # if no verb supplied 

46 if verb is None: 

47 result = BadVerb(request.base_url) 

48 

49 # Identify 

50 elif verb.lower() == "identify": 

51 result = identify(dao, request.base_url) 

52 

53 # ListMetadataFormats 

54 elif verb.lower() == "listmetadataformats": 

55 params = list_metadata_formats_params(request) 

56 result = list_metadata_formats(dao, request.base_url, specified, **params) 

57 

58 # GetRecord 

59 elif verb.lower() == "getrecord": 

60 params = get_record_params(request) 

61 result = get_record(dao, request.base_url, specified, **params) 

62 

63 # ListSets 

64 elif verb.lower() == "listsets": 

65 params = list_sets_params(request) 

66 result = list_sets(dao, request.base_url, **params) 

67 

68 # ListRecords 

69 elif verb.lower() == "listrecords": 

70 params = list_records_params(request) 

71 result = list_records(dao, request.base_url, specified, **params) 

72 

73 # ListIdentifiers 

74 elif verb.lower() == "listidentifiers": 

75 params = list_identifiers_params(request) 

76 result = list_identifiers(dao, request.base_url, specified, **params) 

77 

78 # A verb we didn't understand 

79 else: 

80 result = BadVerb(request.base_url) 

81 

82 # serialise and return 

83 resp = make_response(result.serialise()) 

84 resp.mimetype = "text/xml" 

85 return resp 

86 

87 

88##################################################################### 

89# Utility methods/objects 

90##################################################################### 

91 

92class DateFormat(object): 

93 @classmethod 

94 def granularity(self): 

95 return "YYYY-MM-DDThh:mm:ssZ" 

96 

97 @classmethod 

98 def default_earliest(cls): 

99 return "1970-01-01T00:00:00Z" 

100 

101 @classmethod 

102 def now(cls): 

103 return datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ") 

104 

105 @classmethod 

106 def format(cls, date): 

107 return date.strftime("%Y-%m-%dT%H:%M:%SZ") 

108 

109 @classmethod 

110 def legitimate_granularity(cls, datestr): 

111 formats = ["%Y-%m-%d", "%Y-%m-%dT%H:%M:%SZ"] 

112 success = False 

113 for f in formats: 

114 try: 

115 datetime.strptime(datestr, f) 

116 success = True 

117 break 

118 except Exception: 

119 pass 

120 return success 

121 

122 

123def decode_set_spec(setspec): 

124 # first, make sure the setspec is a string 

125 """ 

126 try: 

127 setspec = setspec.encode("utf-8") 

128 except: 

129 raise SetSpecException() 

130 """ 

131 # switch the ~ for = 

132 setspec = setspec.replace("~", "=") 

133 

134 try: 

135 # base64 decode 

136 decoded = base64.urlsafe_b64decode(setspec) 

137 decoded = decoded.decode("utf-8") 

138 except: 

139 raise SetSpecException() 

140 

141 return decoded 

142 

143 

144def get_start_after(docs, current_start_after, list_size): 

145 last_date = docs[-1].get("last_updated") 

146 count = 0 

147 for doc in docs: 

148 if doc.get("last_updated") == last_date: 

149 count += 1 

150 if count == list_size and current_start_after is not None and last_date == current_start_after[0]: 

151 # If the current set of records have the same date as last record served previously, 

152 # the count has to be greater than the list of records 

153 # and include the previous count 

154 count += current_start_after[1] 

155 return (last_date, count) 

156 

157 

158def make_resumption_token(metadata_prefix=None, from_date=None, until_date=None, oai_set=None, start_number=None, start_after=None): 

159 d = {} 

160 if metadata_prefix is not None: 

161 d["m"] = metadata_prefix 

162 if from_date is not None: 

163 d["f"] = from_date 

164 if until_date is not None: 

165 d["u"] = until_date 

166 if oai_set is not None: 

167 d["s"] = oai_set 

168 if start_number is not None: 

169 d["n"] = start_number 

170 if start_after is not None: 

171 d["a"] = start_after 

172 j = json.dumps(d) 

173 b = base64.urlsafe_b64encode(j.encode('utf-8')) 

174 return b 

175 

176 

177class ResumptionTokenException(Exception): 

178 pass 

179 

180 

181class SetSpecException(Exception): 

182 pass 

183 

184 

185def decode_resumption_token(resumption_token): 

186 # attempt to parse the resumption token out of base64 encoding and as a json object 

187 try: 

188 j = base64.urlsafe_b64decode(str(resumption_token)) 

189 except TypeError: 

190 raise ResumptionTokenException() 

191 try: 

192 d = json.loads(j.decode("utf-8")) # convert the bytes to str for pre 3.5 compat 

193 except ValueError: 

194 raise ResumptionTokenException() 

195 

196 # if we succeed read out the parameters 

197 params = {} 

198 if "m" in d: params["metadata_prefix"] = d.get("m") 

199 if "f" in d: params["from_date"] = d.get("f") 

200 if "u" in d: params["until_date"] = d.get("u") 

201 if "s" in d: params["oai_set"] = d.get("s") 

202 if "n" in d: params["start_number"] = d.get("n") 

203 if "a" in d: params["start_after"] = tuple(d.get("a")) 

204 return params 

205 

206 

207def extract_internal_id(oai_identifier): 

208 # most of the identifier is for show - we only care about the hex string at the end 

209 return oai_identifier.split(":")[-1] 

210 

211 

212def get_response_date(): 

213 # return datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ") 

214 return DateFormat.now() 

215 

216 

217def get_crosswalk(prefix, datatype): 

218 return CROSSWALKS.get(prefix, {}).get(datatype)() 

219 

220 

221def list_metadata_formats_params(req): 

222 identifier = req.values.get("identifier") 

223 if identifier is not None: 

224 identifier = extract_internal_id(identifier) 

225 return {"identifier" : identifier} 

226 

227 

228def get_record_params(req): 

229 identifier = req.values.get("identifier") 

230 metadata_prefix = req.values.get("metadataPrefix") 

231 if identifier is not None: 

232 identifier = extract_internal_id(identifier) 

233 return {"identifier": identifier, "metadata_prefix": metadata_prefix} 

234 

235 

236def list_sets_params(req): 

237 resumption = req.values.get("resumptionToken") 

238 return {"resumption_token" : resumption} 

239 

240 

241def list_records_params(req): 

242 from_date = req.values.get("from") 

243 until_date = req.values.get("until") 

244 oai_set = req.values.get("set") 

245 resumption_token = req.values.get("resumptionToken") 

246 metadata_prefix = req.values.get("metadataPrefix") 

247 return { 

248 "from_date": from_date, 

249 "until_date": until_date, 

250 "oai_set": oai_set, 

251 "resumption_token": resumption_token, 

252 "metadata_prefix": metadata_prefix 

253 } 

254 

255 

256def list_identifiers_params(req): 

257 from_date = req.values.get("from") 

258 until_date = req.values.get("until") 

259 oai_set = req.values.get("set") 

260 resumption_token = req.values.get("resumptionToken") 

261 metadata_prefix = req.values.get("metadataPrefix") 

262 return { 

263 "from_date": from_date, 

264 "until_date": until_date, 

265 "oai_set": oai_set, 

266 "resumption_token": resumption_token, 

267 "metadata_prefix": metadata_prefix 

268 } 

269 

270##################################################################### 

271# OAI-PMH protocol operations implemented 

272##################################################################### 

273 

274def get_record(dao, base_url, specified_oai_endpoint, identifier=None, metadata_prefix=None): 

275 # check that we have both identifier and prefix - they are both required 

276 if identifier is None or metadata_prefix is None: 

277 return BadArgument(base_url) 

278 

279 # get the formats and check that we have formats that we can disseminate 

280 formats = app.config.get("OAIPMH_METADATA_FORMATS", {}).get(specified_oai_endpoint) 

281 if formats is None or len(formats) == 0: 

282 return CannotDisseminateFormat(base_url) 

283 

284 # look for our record of the format we've been asked for 

285 for f in formats: 

286 if f.get("metadataPrefix") == metadata_prefix: 

287 # obtain the record from the dao 

288 record = dao.pull(identifier) 

289 if record is None: 

290 return IdDoesNotExist(base_url) 

291 # do the crosswalk 

292 xwalk = get_crosswalk(f.get("metadataPrefix"), dao.__type__) 

293 metadata = xwalk.crosswalk(record) 

294 header = xwalk.header(record) 

295 # make the response 

296 oai_id = make_oai_identifier(identifier, dao.__type__) 

297 gr = GetRecord(base_url, oai_id, metadata_prefix) 

298 gr.metadata = metadata 

299 gr.header = header 

300 return gr 

301 

302 # if we have not returned already, this means we can't disseminate this format 

303 return CannotDisseminateFormat(base_url) 

304 

305 

306def identify(dao, base_url): 

307 repo_name = app.config.get("SERVICE_NAME") 

308 admin_email = app.config.get("ADMIN_EMAIL") 

309 idobj = Identify(base_url, repo_name, admin_email) 

310 idobj.earliest_datestamp = dao.earliest_datestamp() 

311 return idobj 

312 

313 

314def list_identifiers(dao, base_url, specified_oai_endpoint, metadata_prefix=None, from_date=None, until_date=None, oai_set=None, resumption_token=None): 

315 if resumption_token is None: 

316 # do an initial list records 

317 return _parameterised_list_identifiers( 

318 dao, base_url, 

319 specified_oai_endpoint, metadata_prefix=metadata_prefix, from_date=from_date, 

320 until_date=until_date, oai_set=oai_set 

321 ) 

322 else: 

323 # resumption of previous request 

324 if (metadata_prefix is not None or from_date is not None or until_date is not None 

325 or oai_set is not None): 

326 return BadArgument(base_url) 

327 return _resume_list_identifiers(dao, base_url, specified_oai_endpoint, resumption_token=resumption_token) 

328 

329 

330def _parameterised_list(identifiers_or_records, dao, base_url, specified_oai_endpoint, metadata_prefix=None, from_date=None, until_date=None, oai_set=None, start_number=0, start_after=None): 

331 # metadata prefix is required 

332 if metadata_prefix is None: 

333 return BadArgument(base_url) 

334 

335 # get the formats and check that we have formats that we can disseminate 

336 formats = app.config.get("OAIPMH_METADATA_FORMATS", {}).get(specified_oai_endpoint) 

337 if formats is None or len(formats) == 0: 

338 return CannotDisseminateFormat(base_url) 

339 

340 # check that the dates are formatted correctly 

341 fl = True 

342 ul = True 

343 if from_date is not None: 

344 fl = DateFormat.legitimate_granularity(from_date) 

345 if until_date is not None: 

346 ul = DateFormat.legitimate_granularity(until_date) 

347 

348 if not fl or not ul: 

349 return BadArgument(base_url) 

350 

351 # try: 

352 # if from_date is not None: 

353 # datetime.strptime(from_date, "%Y-%m-%d") 

354 # if until_date is not None: 

355 # datetime.strptime(until_date, "%Y-%m-%d") 

356 # except: 

357 # return BadArgument(base_url) 

358 

359 # get the result set size 

360 list_size = app.config.get("OAIPMH_LIST_IDENTIFIERS_PAGE_SIZE", 25) 

361 

362 # decode the oai_set to something we can query with 

363 try: 

364 decoded_set = decode_set_spec(oai_set) if oai_set is not None else None 

365 except SetSpecException: 

366 return BadArgument(base_url) 

367 

368 for f in formats: 

369 if f.get("metadataPrefix") == metadata_prefix: 

370 # do the query and set up the response object 

371 total, results = dao.list_records(from_date, until_date, decoded_set, list_size, start_after) 

372 

373 # if there are no results, PMH requires us to throw an error 

374 if len(results) == 0: 

375 return NoRecordsMatch(base_url) 

376 

377 # Get the full total 

378 # Each search with a resumption token is a new search, 

379 # so the total is not the same as the first search 

380 # but is reduced by number of records already served. 

381 # This full_total is the total as in the first search 

382 full_total = total 

383 if start_after is not None: 

384 full_total = total + start_number - start_after[1] 

385 

386 # Determine where our next starting index will be 

387 new_start = start_number + len(results) 

388 

389 # Work out if we need a resumption token. It can have one of 3 values: 

390 # - None -> do not include the rt in the response if we have a full result set 

391 # - the empty string -> include in the response if this is the last set of results from an incomplete list 

392 # - some value -> include in the response if there are more values to retrieve 

393 if len(results) == full_total: 

394 resumption_token = None 

395 elif new_start == full_total: 

396 resumption_token = '' 

397 else: 

398 new_start_after = get_start_after(results, start_after, list_size) 

399 resumption_token = make_resumption_token(metadata_prefix=metadata_prefix, from_date=from_date, 

400 until_date=until_date, oai_set=oai_set, start_number=new_start, 

401 start_after=new_start_after) 

402 

403 # Get our list of results for this request 

404 if identifiers_or_records == 'identifiers': 

405 lst = ListIdentifiers(base_url, from_date=from_date, until_date=until_date, oai_set=oai_set, 

406 metadata_prefix=metadata_prefix) 

407 else: # ListRecords 

408 lst = ListRecords(base_url, from_date=from_date, until_date=until_date, oai_set=oai_set, 

409 metadata_prefix=metadata_prefix) 

410 

411 if resumption_token is not None: 

412 expiry = app.config.get("OAIPMH_RESUMPTION_TOKEN_EXPIRY", -1) 

413 lst.set_resumption(resumption_token, complete_list_size=full_total, cursor=new_start, expiry=expiry) 

414 

415 for r in results: 

416 # do the crosswalk 

417 xwalk = get_crosswalk(f.get("metadataPrefix"), dao.__type__) 

418 header = xwalk.header(r) 

419 

420 if identifiers_or_records == 'identifiers': 

421 # add to the response (header only) 

422 lst.add_record(header) 

423 else: # ListRecords 

424 metadata = xwalk.crosswalk(r) 

425 # add to the response (metadata and 

426 lst.add_record(metadata, header) 

427 return lst 

428 

429 # if we have not returned already, this means we can't disseminate this format 

430 return CannotDisseminateFormat(base_url) 

431 

432 

433def _parameterised_list_identifiers(dao, base_url, specified_oai_endpoint, metadata_prefix=None, from_date=None, until_date=None, oai_set=None, start_number=0, start_after=None): 

434 return _parameterised_list('identifiers', dao, base_url, specified_oai_endpoint, metadata_prefix, from_date, until_date, oai_set, start_number, start_after) 

435 

436 

437def _resume_list_identifiers(dao, base_url, specified_oai_endpoint, resumption_token=None): 

438 try: 

439 params = decode_resumption_token(resumption_token) 

440 except ResumptionTokenException: 

441 return BadResumptionToken(base_url) 

442 return _parameterised_list_identifiers(dao, base_url, specified_oai_endpoint, **params) 

443 

444 

445def list_metadata_formats(dao, base_url, specified_oai_endpoint, identifier=None): 

446 # if we are given an identifier, it has to be valid 

447 if identifier is not None: 

448 if not dao.identifier_exists(identifier): 

449 return IdDoesNotExist(base_url) 

450 

451 # get the configured formats - there should always be some, but just in case 

452 # the service is mis-configured, this will throw the correct error 

453 formats = app.config.get("OAIPMH_METADATA_FORMATS", {}).get(specified_oai_endpoint) 

454 if formats is None or len(formats) == 0: 

455 return NoMetadataFormats(base_url) 

456 

457 # create and return the list metadata formats response 

458 oai_id = None 

459 if identifier is not None: 

460 oai_id = make_oai_identifier(identifier, dao.__type__) 

461 lmf = ListMetadataFormats(base_url=base_url, identifier=oai_id) 

462 for f in formats: 

463 lmf.add_format(f.get("metadataPrefix"), f.get("schema"), f.get("metadataNamespace")) 

464 return lmf 

465 

466 

467def list_records(dao, base_url, specified_oai_endpoint, metadata_prefix=None, from_date=None, until_date=None, oai_set=None, resumption_token=None): 

468 

469 if resumption_token is None: 

470 # do an initial list records 

471 return _parameterised_list_records(dao, base_url, specified_oai_endpoint, metadata_prefix=metadata_prefix, from_date=from_date, until_date=until_date, oai_set=oai_set) 

472 else: 

473 # resumption of previous request 

474 if (metadata_prefix is not None or from_date is not None or until_date is not None 

475 or oai_set is not None): 

476 return BadArgument(base_url) 

477 return _resume_list_records(dao, base_url, specified_oai_endpoint, resumption_token=resumption_token) 

478 

479 

480def _parameterised_list_records(dao, base_url, specified_oai_endpoint, metadata_prefix=None, from_date=None, until_date=None, oai_set=None, start_number=0, start_after=None): 

481 return _parameterised_list('records', dao, base_url, specified_oai_endpoint, metadata_prefix, from_date, until_date, oai_set, start_number, start_after) 

482 

483 

484def _resume_list_records(dao, base_url, specified_oai_endpoint, resumption_token=None): 

485 try: 

486 params = decode_resumption_token(resumption_token) 

487 except ResumptionTokenException: 

488 return BadResumptionToken(base_url) 

489 return _parameterised_list_records(dao, base_url, specified_oai_endpoint, **params) 

490 

491 

492def list_sets(dao, base_url, resumption_token=None): 

493 # This implementation does not support resumption tokens for this operation 

494 if resumption_token is not None: 

495 return BadResumptionToken(base_url) 

496 

497 # just ask the DAO to get a list of all the sets for us, then we 

498 # give the set spec and set name as the same string 

499 ls = ListSets(base_url) 

500 sets = dao.list_sets() 

501 for s in sets: 

502 ls.add_set(make_set_spec(s), s) 

503 return ls 

504 

505 

506##################################################################### 

507# Objects 

508##################################################################### 

509 

510class OAI_PMH(object): 

511 VERSION = "2.0" 

512 

513 PMH_NAMESPACE = "http://www.openarchives.org/OAI/2.0/" 

514 PMH = "{%s}" % PMH_NAMESPACE 

515 

516 XSI_NAMESPACE = "http://www.w3.org/2001/XMLSchema-instance" 

517 XSI = "{%s}" % XSI_NAMESPACE 

518 

519 NSMAP = {None : PMH_NAMESPACE, "xsi" : XSI_NAMESPACE} 

520 

521 def __init__(self, base_url): 

522 self.base_url = base_url 

523 self.verb = None 

524 

525 def _to_xml(self): 

526 oai = etree.Element(self.PMH + "OAI-PMH", nsmap=self.NSMAP) 

527 oai.set(self.XSI + "schemaLocation", 

528 "http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd") 

529 

530 respdate = etree.SubElement(oai, self.PMH + "responseDate") 

531 respdate.text = get_response_date() 

532 

533 req = etree.SubElement(oai, self.PMH + "request") 

534 if self.verb is not None: 

535 req.set("verb", self.verb) 

536 req.text = self.base_url 

537 self.add_request_attributes(req) 

538 

539 element = self.get_element() 

540 oai.append(element) 

541 

542 return oai 

543 

544 def serialise(self): 

545 xml = self._to_xml() 

546 return etree.tostring(xml, xml_declaration=True, encoding="UTF-8") 

547 

548 def get_element(self): 

549 raise NotImplementedError() 

550 

551 def add_request_attributes(self, element): 

552 return 

553 

554 

555class GetRecord(OAI_PMH): 

556 def __init__(self, base_url, identifier, metadata_prefix): 

557 super(GetRecord, self).__init__(base_url) 

558 self.verb = "GetRecord" 

559 self.identifier = identifier 

560 self.metadata_prefix = metadata_prefix 

561 self.metadata = None 

562 self.header = None 

563 

564 def get_element(self): 

565 gr = etree.Element(self.PMH + "GetRecord", nsmap=self.NSMAP) 

566 record = etree.SubElement(gr, self.PMH + "record") 

567 

568 record.append(self.header) 

569 record.append(self.metadata) 

570 

571 return gr 

572 

573 def add_request_attributes(self, element): 

574 if self.identifier is not None: 

575 element.set("identifier", self.identifier) 

576 if self.metadata_prefix is not None: 

577 element.set("metadataPrefix", self.metadata_prefix) 

578 

579 

580class Identify(OAI_PMH): 

581 def __init__(self, base_url, repo_name, admin_email): 

582 super(Identify, self).__init__(base_url) 

583 self.verb = "Identify" 

584 self.repo_name = repo_name 

585 self.admin_email = admin_email 

586 self.earliest_datestamp = None 

587 

588 def get_element(self): 

589 identify = etree.Element(self.PMH + "Identify", nsmap=self.NSMAP) 

590 

591 repo_name = etree.SubElement(identify, self.PMH + "repositoryName") 

592 repo_name.text = self.repo_name 

593 

594 base = etree.SubElement(identify, self.PMH + "baseURL") 

595 base.text = self.base_url 

596 

597 protocol = etree.SubElement(identify, self.PMH + "protocolVersion") 

598 protocol.text = self.VERSION 

599 

600 admin_email = etree.SubElement(identify, self.PMH + "adminEmail") 

601 admin_email.text = self.admin_email 

602 

603 earliest = etree.SubElement(identify, self.PMH + "earliestDatestamp") 

604 if self.earliest_datestamp is not None: 

605 earliest.text = self.earliest_datestamp 

606 else: 

607 # earliest.text = "1970-01-01T00:00:00Z" # beginning of the unix epoch 

608 DateFormat.default_earliest() 

609 

610 deletes = etree.SubElement(identify, self.PMH + "deletedRecord") 

611 deletes.text = "transient" # keep the door open 

612 

613 granularity = etree.SubElement(identify, self.PMH + "granularity") 

614 # granularity.text = "YYYY-MM-DD" 

615 granularity.text = DateFormat.granularity() 

616 

617 return identify 

618 

619 

620class ListIdentifiers(OAI_PMH): 

621 def __init__(self, base_url, from_date=None, until_date=None, oai_set=None, metadata_prefix=None): 

622 super(ListIdentifiers, self).__init__(base_url) 

623 self.verb = "ListIdentifiers" 

624 self.from_date = from_date 

625 self.until_date = until_date 

626 self.oai_set = oai_set 

627 self.metadata_prefix = metadata_prefix 

628 self.records = [] 

629 self.resumption = None 

630 

631 def set_resumption(self, resumption_token, complete_list_size=None, cursor=None, expiry=-1): 

632 self.resumption = {"resumption_token" : resumption_token, "expiry" : expiry} 

633 if complete_list_size is not None: 

634 self.resumption["complete_list_size"] = complete_list_size 

635 if cursor is not None: 

636 self.resumption["cursor"] = cursor 

637 

638 def add_record(self, header): 

639 self.records.append(header) 

640 

641 def add_request_attributes(self, element): 

642 if self.from_date is not None: 

643 element.set("from", self.from_date) 

644 if self.until_date is not None: 

645 element.set("until", self.until_date) 

646 if self.oai_set is not None: 

647 element.set("set", self.oai_set) 

648 if self.metadata_prefix is not None: 

649 element.set("metadataPrefix", self.metadata_prefix) 

650 

651 def get_element(self): 

652 lr = etree.Element(self.PMH + "ListIdentifiers", nsmap=self.NSMAP) 

653 

654 for header in self.records: 

655 lr.append(header) 

656 

657 if self.resumption is not None: 

658 rt = etree.SubElement(lr, self.PMH + "resumptionToken") 

659 if "complete_list_size" in self.resumption: 

660 rt.set("completeListSize", str(self.resumption.get("complete_list_size"))) 

661 if "cursor" in self.resumption: 

662 rt.set("cursor", str(self.resumption.get("cursor"))) 

663 expiry = self.resumption.get("expiry", -1) 

664 expire_date = None 

665 if expiry >= 0: 

666 # expire_date = (datetime.now() + timedelta(0, expiry)).strftime("%Y-%m-%dT%H:%M:%SZ") 

667 expire_date = DateFormat.format(datetime.now() + timedelta(0, expiry)) 

668 rt.set("expirationDate", expire_date) 

669 rt.text = self.resumption.get("resumption_token") 

670 

671 return lr 

672 

673 

674class ListMetadataFormats(OAI_PMH): 

675 def __init__(self, base_url, identifier=None): 

676 super(ListMetadataFormats, self).__init__(base_url) 

677 self.verb = "ListMetadataFormats" 

678 self.identifier = identifier 

679 self.formats = [] 

680 

681 def add_format(self, metadata_prefix, schema, metadata_namespace): 

682 self.formats.append( 

683 { 

684 "metadataPrefix": metadata_prefix, 

685 "schema": schema, 

686 "metadataNamespace": metadata_namespace 

687 } 

688 ) 

689 

690 def add_request_attributes(self, element): 

691 if self.identifier is not None: 

692 element.set("identifier", self.identifier) 

693 

694 def get_element(self): 

695 lmf = etree.Element(self.PMH + "ListMetadataFormats", nsmap=self.NSMAP) 

696 

697 for f in self.formats: 

698 mdf = etree.SubElement(lmf, self.PMH + "metadataFormat") 

699 

700 mdp = etree.SubElement(mdf, self.PMH + "metadataPrefix") 

701 mdp.text = f.get("metadataPrefix") 

702 

703 sch = etree.SubElement(mdf, self.PMH + "schema") 

704 sch.text = f.get("schema") 

705 

706 mdn = etree.SubElement(mdf, self.PMH + "metadataNamespace") 

707 mdn.text = f.get("metadataNamespace") 

708 

709 return lmf 

710 

711 

712class ListRecords(OAI_PMH): 

713 def __init__(self, base_url, from_date=None, until_date=None, oai_set=None, metadata_prefix=None): 

714 super(ListRecords, self).__init__(base_url) 

715 self.verb = "ListRecords" 

716 self.from_date = from_date 

717 self.until_date = until_date 

718 self.oai_set = oai_set 

719 self.metadata_prefix = metadata_prefix 

720 self.records = [] 

721 self.resumption = None 

722 self.resumption_expiry = -1 

723 

724 def set_resumption(self, resumption_token, complete_list_size=None, cursor=None, expiry=-1): 

725 self.resumption = {"resumption_token" : resumption_token, "expiry" : expiry} 

726 if complete_list_size is not None: 

727 self.resumption["complete_list_size"] = complete_list_size 

728 if cursor is not None: 

729 self.resumption["cursor"] = cursor 

730 

731 def add_record(self, metadata, header): 

732 self.records.append((metadata, header)) 

733 

734 def add_request_attributes(self, element): 

735 if self.from_date is not None: 

736 element.set("from", self.from_date) 

737 if self.until_date is not None: 

738 element.set("until", self.until_date) 

739 if self.oai_set is not None: 

740 element.set("set", self.oai_set) 

741 if self.metadata_prefix is not None: 

742 element.set("metadataPrefix", self.metadata_prefix) 

743 

744 def get_element(self): 

745 lr = etree.Element(self.PMH + "ListRecords", nsmap=self.NSMAP) 

746 

747 for metadata, header in self.records: 

748 r = etree.SubElement(lr, self.PMH + "record") 

749 r.append(header) 

750 r.append(metadata) 

751 

752 if self.resumption is not None: 

753 rt = etree.SubElement(lr, self.PMH + "resumptionToken") 

754 if "complete_list_size" in self.resumption: 

755 rt.set("completeListSize", str(self.resumption.get("complete_list_size"))) 

756 if "cursor" in self.resumption: 

757 rt.set("cursor", str(self.resumption.get("cursor"))) 

758 expiry = self.resumption.get("expiry", -1) 

759 expire_date = None 

760 if expiry >= 0: 

761 # expire_date = (datetime.now() + timedelta(0, expiry)).strftime("%Y-%m-%dT%H:%M:%SZ") 

762 expire_date = DateFormat.format(datetime.now() + timedelta(0, expiry)) 

763 rt.set("expirationDate", expire_date) 

764 rt.text = self.resumption.get("resumption_token") 

765 

766 return lr 

767 

768 

769class ListSets(OAI_PMH): 

770 def __init__(self, base_url): 

771 super(ListSets, self).__init__(base_url) 

772 self.verb = "ListSets" 

773 self.sets = [] 

774 

775 def add_set(self, spec, name): 

776 self.sets.append((spec, name)) 

777 

778 def get_element(self): 

779 ls = etree.Element(self.PMH + "ListSets", nsmap=self.NSMAP) 

780 

781 for spec, name in self.sets: 

782 s = etree.SubElement(ls, self.PMH + "set") 

783 specel = etree.SubElement(s, self.PMH + "setSpec") 

784 specel.text = spec 

785 nameel = etree.SubElement(s, self.PMH + "setName") 

786 nameel.text = name 

787 

788 return ls 

789 

790 

791##################################################################### 

792# Error Handling 

793##################################################################### 

794 

795class OAIPMHError(OAI_PMH): 

796 def __init__(self, base_url): 

797 super(OAIPMHError, self).__init__(base_url) 

798 self.code = None 

799 self.description = None 

800 

801 def get_element(self): 

802 error = etree.Element(self.PMH + "error", nsmap=self.NSMAP) 

803 

804 if self.code is not None: 

805 error.set("code", self.code) 

806 

807 if self.description is not None: 

808 error.text = self.description 

809 

810 return error 

811 

812 

813class BadArgument(OAIPMHError): 

814 def __init__(self, base_url): 

815 super(BadArgument, self).__init__(base_url) 

816 self.code = "badArgument" 

817 self.description = "The request includes illegal arguments, is missing required arguments, includes a repeated argument, or values for arguments have an illegal syntax." 

818 

819 

820class BadResumptionToken(OAIPMHError): 

821 def __init__(self, base_url): 

822 super(BadResumptionToken, self).__init__(base_url) 

823 self.code = "badResumptionToken" 

824 self.description = "The value of the resumptionToken argument is invalid or expired." 

825 

826 

827class BadVerb(OAIPMHError): 

828 def __init__(self, base_url): 

829 super(BadVerb, self).__init__(base_url) 

830 self.code = "badVerb" 

831 self.description = "Value of the verb argument is not a legal OAI-PMH verb, the verb argument is missing, or the verb argument is repeated." 

832 

833 

834class CannotDisseminateFormat(OAIPMHError): 

835 def __init__(self, base_url): 

836 super(CannotDisseminateFormat, self).__init__(base_url) 

837 self.code = "cannotDisseminateFormat" 

838 self.description = "The metadata format identified by the value given for the metadataPrefix argument is not supported by the item or by the repository." 

839 

840 

841class IdDoesNotExist(OAIPMHError): 

842 def __init__(self, base_url): 

843 super(IdDoesNotExist, self).__init__(base_url) 

844 self.code = "idDoesNotExist" 

845 self.description = "The value of the identifier argument is unknown or illegal in this repository." 

846 

847 

848class NoRecordsMatch(OAIPMHError): 

849 def __init__(self, base_url): 

850 super(NoRecordsMatch, self).__init__(base_url) 

851 self.code = "noRecordsMatch" 

852 self.description = "The combination of the values of the from, until, set and metadataPrefix arguments results in an empty list." 

853 

854 

855class NoMetadataFormats(OAIPMHError): 

856 def __init__(self, base_url): 

857 super(NoMetadataFormats, self).__init__(base_url) 

858 self.code = "noMetadataFormats" 

859 self.description = "There are no metadata formats available for the specified item." 

860 

861 

862class NoSetHierarchy(OAIPMHError): 

863 def __init__(self, base_url): 

864 super(NoSetHierarchy, self).__init__(base_url) 

865 self.code = "noSetHierarchy" 

866 self.description = "The repository does not support sets."