Coverage for portality / view / oaipmh.py: 89%

576 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-05-04 09:41 +0100

1import binascii 

2import json, base64 

3 

4from lxml import etree 

5from datetime import datetime, timedelta 

6from flask import Blueprint, request, make_response 

7from flask_login import current_user 

8from portality.core import app 

9from portality.lib.dates import FMT_DATETIME_STD, DEFAULT_TIMESTAMP_VAL, FMT_DATE_STD 

10from portality.models import OAIPMHJournal, OAIPMHArticle 

11from portality.lib import plausible, dates 

12from portality.crosswalks.oaipmh import CROSSWALKS, make_set_spec, make_oai_identifier 

13from portality import constants 

14 

15blueprint = Blueprint('oaipmh', __name__) 

16 

17 

18##################################################################### 

19# Web API endpoints 

20##################################################################### 

21 

22@blueprint.route("/oai", methods=["GET", "POST"]) 

23@blueprint.route("/oai.<specified>", methods=["GET", "POST"]) 

24def oaipmh(specified=None): 

25 # Google Analytics event, we don't know the action yet but it will be required. 

26 event_payload = {} 

27 # work out which endpoint we're going to 

28 if specified is None: 

29 dao = OAIPMHJournal() 

30 event_payload['label'] = 'Journal' 

31 else: 

32 specified = specified.lower() 

33 dao = OAIPMHArticle() 

34 event_payload['label'] = 'Article' 

35 

36 # Add the identifier to the event if there is one 

37 ident = request.values.get('identifier', None) 

38 if ident is not None: 

39 event_payload[app.config.get('ANALYTICS_DIMENSIONS')['oai_res_id']] = ident 

40 

41 # work out the verb and associated parameters 

42 verb = request.values.get("verb") 

43 event_payload['action'] = verb 

44 

45 # Now we have enough information about the request to send to analytics. 

46 plausible.send_event(app.config.get('ANALYTICS_CATEGORY_OAI', 'OAI-PMH'), 

47 **event_payload) 

48 

49 # call the appropriate protocol operation: 

50 # if no verb supplied 

51 if verb is None: 

52 result = BadVerb(request.base_url) 

53 

54 # Identify 

55 elif verb.lower() == "identify": 

56 result = identify(dao, request.base_url) 

57 

58 # ListMetadataFormats 

59 elif verb.lower() == "listmetadataformats": 

60 params = list_metadata_formats_params(request) 

61 result = list_metadata_formats(dao, request.base_url, specified, **params) 

62 

63 # GetRecord 

64 elif verb.lower() == "getrecord": 

65 params = get_record_params(request) 

66 result = get_record(dao, request.base_url, specified, **params) 

67 

68 # ListSets 

69 elif verb.lower() == "listsets": 

70 params = list_sets_params(request) 

71 result = list_sets(dao, request.base_url, **params) 

72 

73 # ListRecords 

74 elif verb.lower() == "listrecords": 

75 params = list_records_params(request) 

76 result = list_records(dao, request.base_url, specified, **params) 

77 

78 # ListIdentifiers 

79 elif verb.lower() == "listidentifiers": 

80 params = list_identifiers_params(request) 

81 result = list_identifiers(dao, request.base_url, specified, **params) 

82 

83 # A verb we didn't understand 

84 else: 

85 result = BadVerb(request.base_url) 

86 

87 # serialise and return 

88 resp = make_response(result.serialise()) 

89 resp.mimetype = "text/xml" 

90 return resp 

91 

92 

93##################################################################### 

94# Utility methods/objects 

95##################################################################### 

96 

97class DateFormat(object): 

98 @classmethod 

99 def granularity(self): 

100 return "YYYY-MM-DDThh:mm:ssZ" 

101 

102 @classmethod 

103 def default_earliest(cls): 

104 return DEFAULT_TIMESTAMP_VAL 

105 

106 @classmethod 

107 def now(cls): 

108 return dates.now_str() 

109 

110 @classmethod 

111 def format(cls, date): 

112 return date.strftime(FMT_DATETIME_STD) 

113 

114 @classmethod 

115 def legitimate_granularity(cls, datestr): 

116 formats = [FMT_DATE_STD, FMT_DATETIME_STD] 

117 success = False 

118 for f in formats: 

119 try: 

120 datetime.strptime(datestr, f) 

121 success = True 

122 break 

123 except Exception: 

124 pass 

125 return success 

126 

127 

128def decode_set_spec(setspec): 

129 # first, make sure the setspec is a string 

130 """ 

131 try: 

132 setspec = setspec.encode("utf-8") 

133 except: 

134 raise SetSpecException() 

135 """ 

136 # switch the ~ for = 

137 setspec = setspec.replace("~", "=") 

138 

139 try: 

140 # base64 decode 

141 decoded = base64.urlsafe_b64decode(setspec) 

142 decoded = decoded.decode("utf-8") 

143 except: 

144 raise SetSpecException() 

145 

146 return decoded 

147 

148 

149def get_start_after(docs, current_start_after, list_size): 

150 last_date = docs[-1].get("last_updated") 

151 count = 0 

152 for doc in docs: 

153 if doc.get("last_updated") == last_date: 

154 count += 1 

155 if count == list_size and current_start_after is not None and last_date == current_start_after[0]: 

156 # If the current set of records have the same date as last record served previously, 

157 # the count has to be greater than the list of records 

158 # and include the previous count 

159 count += current_start_after[1] 

160 return (last_date, count) 

161 

162 

163def make_resumption_token(metadata_prefix=None, from_date=None, until_date=None, oai_set=None, start_number=None, start_after=None): 

164 d = {} 

165 if metadata_prefix is not None: 

166 d["m"] = metadata_prefix 

167 if from_date is not None: 

168 d["f"] = from_date 

169 if until_date is not None: 

170 d["u"] = until_date 

171 if oai_set is not None: 

172 d["s"] = oai_set 

173 if start_number is not None: 

174 d["n"] = start_number 

175 if start_after is not None: 

176 d["a"] = start_after 

177 j = json.dumps(d) 

178 b = base64.urlsafe_b64encode(j.encode('utf-8')) 

179 return b 

180 

181 

182class ResumptionTokenException(Exception): 

183 pass 

184 

185 

186class SetSpecException(Exception): 

187 pass 

188 

189 

190def decode_resumption_token(resumption_token, account=None): 

191 # attempt to parse the resumption token out of base64 encoding and as a json object 

192 try: 

193 j = base64.urlsafe_b64decode(str(resumption_token)) 

194 d = json.loads(j.decode("utf-8")) # convert the bytes to str for pre 3.5 compat 

195 except (TypeError, binascii.Error, ValueError): 

196 raise ResumptionTokenException() 

197 

198 # if we succeed read out the parameters 

199 params = {} 

200 if "m" in d: params["metadata_prefix"] = d.get("m") 

201 if "f" in d: params["from_date"] = d.get("f") 

202 if "u" in d: params["until_date"] = d.get("u") 

203 if "s" in d: params["oai_set"] = d.get("s") 

204 if "n" in d: params["start_number"] = d.get("n") 

205 if "a" in d: params["start_after"] = tuple(d.get("a")) 

206 

207 if "until_date" in params: 

208 params["until_date"] = _premium_until_date(params["until_date"], account) 

209 else: 

210 ud = _premium_until_date(None, account) 

211 if ud is not None: 

212 params["until_date"] = ud 

213 

214 return params 

215 

216 

217def extract_internal_id(oai_identifier): 

218 # most of the identifier is for show - we only care about the hex string at the end 

219 return oai_identifier.split(":")[-1] 

220 

221 

222def get_response_date(): 

223 return DateFormat.now() 

224 

225 

226def get_crosswalk(prefix, datatype): 

227 return CROSSWALKS.get(prefix, {}).get(datatype)() 

228 

229 

230def list_metadata_formats_params(req): 

231 identifier = req.values.get("identifier") 

232 if identifier is not None: 

233 identifier = extract_internal_id(identifier) 

234 return {"identifier" : identifier} 

235 

236 

237def get_record_params(req): 

238 identifier = req.values.get("identifier") 

239 metadata_prefix = req.values.get("metadataPrefix") 

240 if identifier is not None: 

241 identifier = extract_internal_id(identifier) 

242 return {"identifier": identifier, "metadata_prefix": metadata_prefix} 

243 

244 

245def list_sets_params(req): 

246 resumption = req.values.get("resumptionToken") 

247 return {"resumption_token" : resumption} 

248 

249 

250def list_records_params(req): 

251 from_date = req.values.get("from") 

252 until_date = req.values.get("until") 

253 oai_set = req.values.get("set") 

254 resumption_token = req.values.get("resumptionToken") 

255 metadata_prefix = req.values.get("metadataPrefix") 

256 

257 # now fix the until_date according to the premium user requirement 

258 until_date = _premium_until_date(until_date) 

259 

260 return { 

261 "from_date": from_date, 

262 "until_date": until_date, 

263 "oai_set": oai_set, 

264 "resumption_token": resumption_token, 

265 "metadata_prefix": metadata_prefix 

266 } 

267 

268def _premium_until_date(until_date, account=None): 

269 # If a user is not authenticated: 

270 # 

271 # If the requested until_date is empty/None, set this to a timestamp 1 month ago 

272 # If the requested until_date is a date that is less than a month ago, set this to a timestamp 1 month ago 

273 # If the requested until_date is a date more than 1 month ago, leave unchanged 

274 

275 if not app.config.get("PREMIUM_MODE", True): 

276 return until_date 

277 

278 if account is None: 

279 if current_user and not current_user.is_anonymous: 

280 account = current_user._get_current_object() 

281 

282 if account is not None and account.has_role(constants.ROLE_PREMIUM_OAI): 

283 return until_date 

284 

285 non_premium_delay_seconds = app.config.get("NON_PREMIUM_DELAY_SECONDS", 2592000) 

286 

287 # if we are in the phase-in period, cap the delay to the phase in date 

288 if app.config.get("PREMIUM_PHASE_IN", False): 

289 phase_in_start = app.config.get("PREMIUM_PHASE_IN_START") 

290 if phase_in_start is not None: 

291 max_delay = dates.now() - phase_in_start 

292 if max_delay.total_seconds() < non_premium_delay_seconds: 

293 non_premium_delay_seconds = max_delay.total_seconds() 

294 

295 non_premium_delay = dates.before_now(non_premium_delay_seconds) 

296 

297 if until_date is None: 

298 return dates.format(non_premium_delay) 

299 else: 

300 d = None 

301 try: 

302 d = dates.parse(until_date) 

303 except ValueError: 

304 return dates.format(non_premium_delay) 

305 

306 if d and dates.is_after(d, non_premium_delay): 

307 return dates.format(non_premium_delay) 

308 

309 return until_date 

310 

311def list_identifiers_params(req): 

312 from_date = req.values.get("from") 

313 until_date = req.values.get("until") 

314 oai_set = req.values.get("set") 

315 resumption_token = req.values.get("resumptionToken") 

316 metadata_prefix = req.values.get("metadataPrefix") 

317 

318 # now fix the until_date according to the premium user requirement 

319 until_date = _premium_until_date(until_date) 

320 

321 return { 

322 "from_date": from_date, 

323 "until_date": until_date, 

324 "oai_set": oai_set, 

325 "resumption_token": resumption_token, 

326 "metadata_prefix": metadata_prefix 

327 } 

328 

329##################################################################### 

330# OAI-PMH protocol operations implemented 

331##################################################################### 

332 

333def get_record(dao, base_url, specified_oai_endpoint, identifier=None, metadata_prefix=None): 

334 # check that we have both identifier and prefix - they are both required 

335 if identifier is None or metadata_prefix is None: 

336 return BadArgument(base_url) 

337 

338 # get the formats and check that we have formats that we can disseminate 

339 formats = app.config.get("OAIPMH_METADATA_FORMATS", {}).get(specified_oai_endpoint) 

340 if formats is None or len(formats) == 0: 

341 return CannotDisseminateFormat(base_url) 

342 

343 # look for our record of the format we've been asked for 

344 for f in formats: 

345 if f.get("metadataPrefix") == metadata_prefix: 

346 # obtain the record from the dao 

347 record = dao.pull(identifier) 

348 if record is None: 

349 return IdDoesNotExist(base_url) 

350 # do the crosswalk 

351 xwalk = get_crosswalk(f.get("metadataPrefix"), dao.__type__) 

352 

353 header = xwalk.header(record) 

354 oai_id = make_oai_identifier(identifier, dao.__type__) 

355 gr = GetRecord(base_url, oai_id, metadata_prefix) 

356 gr.header = header 

357 

358 if record.is_in_doaj(): 

359 metadata = xwalk.crosswalk(record) 

360 gr.metadata = metadata 

361 

362 return gr 

363 

364 # if we have not returned already, this means we can't disseminate this format 

365 return CannotDisseminateFormat(base_url) 

366 

367 

368def identify(dao, base_url): 

369 repo_name = app.config.get("SERVICE_NAME") 

370 admin_email = app.config.get("OAI_ADMIN_EMAIL", app.config.get("ADMIN_EMAIL")) 

371 idobj = Identify(base_url, repo_name, admin_email) 

372 idobj.earliest_datestamp = dao.earliest_datestamp() 

373 return idobj 

374 

375 

376def list_identifiers(dao, base_url, specified_oai_endpoint, metadata_prefix=None, from_date=None, until_date=None, oai_set=None, resumption_token=None): 

377 if resumption_token is None: 

378 # do an initial list records 

379 return _parameterised_list_identifiers( 

380 dao, base_url, 

381 specified_oai_endpoint, metadata_prefix=metadata_prefix, from_date=from_date, 

382 until_date=until_date, oai_set=oai_set 

383 ) 

384 else: 

385 # resumption of previous request 

386 if (metadata_prefix is not None or from_date is not None or until_date is not None 

387 or oai_set is not None): 

388 return BadArgument(base_url) 

389 return _resume_list_identifiers(dao, base_url, specified_oai_endpoint, resumption_token=resumption_token) 

390 

391 

392def _parameterised_list(identifiers_or_records, dao, base_url, specified_oai_endpoint, metadata_prefix=None, from_date=None, until_date=None, oai_set=None, start_number=0, start_after=None): 

393 # metadata prefix is required 

394 if metadata_prefix is None: 

395 return BadArgument(base_url) 

396 

397 # get the formats and check that we have formats that we can disseminate 

398 formats = app.config.get("OAIPMH_METADATA_FORMATS", {}).get(specified_oai_endpoint) 

399 if formats is None or len(formats) == 0: 

400 return CannotDisseminateFormat(base_url) 

401 

402 # check that the dates are formatted correctly 

403 fl = True 

404 ul = True 

405 if from_date is not None: 

406 fl = DateFormat.legitimate_granularity(from_date) 

407 if until_date is not None: 

408 ul = DateFormat.legitimate_granularity(until_date) 

409 

410 if not fl or not ul: 

411 return BadArgument(base_url) 

412 

413 # get the result set size 

414 list_size = app.config.get("OAIPMH_LIST_IDENTIFIERS_PAGE_SIZE", 25) 

415 

416 # decode the oai_set to something we can query with 

417 try: 

418 decoded_set = decode_set_spec(oai_set) if oai_set is not None else None 

419 except SetSpecException: 

420 return BadArgument(base_url) 

421 

422 for f in formats: 

423 if f.get("metadataPrefix") == metadata_prefix: 

424 # do the query and set up the response object 

425 total, results = dao.list_records(from_date, until_date, decoded_set, list_size, start_after) 

426 

427 # if there are no results, PMH requires us to throw an error 

428 if len(results) == 0: 

429 return NoRecordsMatch(base_url) 

430 

431 # Get the full total 

432 # Each search with a resumption token is a new search, 

433 # so the total is not the same as the first search 

434 # but is reduced by number of records already served. 

435 # This full_total is the total as in the first search 

436 full_total = total 

437 if start_after is not None: 

438 full_total = total + start_number - start_after[1] 

439 

440 # Determine where our next starting index will be 

441 new_start = start_number + len(results) 

442 

443 # Work out if we need a resumption token. It can have one of 3 values: 

444 # - None -> do not include the rt in the response if we have a full result set 

445 # - the empty string -> include in the response if this is the last set of results from an incomplete list 

446 # - some value -> include in the response if there are more values to retrieve 

447 if len(results) == full_total: 

448 resumption_token = None 

449 elif new_start == full_total: 

450 resumption_token = '' 

451 else: 

452 new_start_after = get_start_after(results, start_after, list_size) 

453 resumption_token = make_resumption_token(metadata_prefix=metadata_prefix, from_date=from_date, 

454 until_date=until_date, oai_set=oai_set, start_number=new_start, 

455 start_after=new_start_after) 

456 

457 # Get our list of results for this request 

458 if identifiers_or_records == 'identifiers': 

459 lst = ListIdentifiers(base_url, from_date=from_date, until_date=until_date, oai_set=oai_set, 

460 metadata_prefix=metadata_prefix) 

461 else: # ListRecords 

462 lst = ListRecords(base_url, from_date=from_date, until_date=until_date, oai_set=oai_set, 

463 metadata_prefix=metadata_prefix) 

464 

465 if resumption_token is not None: 

466 expiry = app.config.get("OAIPMH_RESUMPTION_TOKEN_EXPIRY", -1) 

467 lst.set_resumption(resumption_token, complete_list_size=full_total, cursor=new_start, expiry=expiry) 

468 

469 for r in results: 

470 # do the crosswalk 

471 xwalk = get_crosswalk(f.get("metadataPrefix"), dao.__type__) 

472 header = xwalk.header(r) 

473 

474 if identifiers_or_records == 'identifiers': 

475 # add to the response (header only) 

476 lst.add_record(header) 

477 else: # ListRecords 

478 metadata = xwalk.crosswalk(r) 

479 # add to the response (metadata and 

480 lst.add_record(metadata, header) 

481 return lst 

482 

483 # if we have not returned already, this means we can't disseminate this format 

484 return CannotDisseminateFormat(base_url) 

485 

486 

487def _parameterised_list_identifiers(dao, base_url, specified_oai_endpoint, metadata_prefix=None, from_date=None, until_date=None, oai_set=None, start_number=0, start_after=None): 

488 return _parameterised_list('identifiers', dao, base_url, specified_oai_endpoint, metadata_prefix, from_date, until_date, oai_set, start_number, start_after) 

489 

490 

491def _resume_list_identifiers(dao, base_url, specified_oai_endpoint, resumption_token=None): 

492 try: 

493 params = decode_resumption_token(resumption_token) 

494 except ResumptionTokenException: 

495 return BadResumptionToken(base_url) 

496 return _parameterised_list_identifiers(dao, base_url, specified_oai_endpoint, **params) 

497 

498 

499def list_metadata_formats(dao, base_url, specified_oai_endpoint, identifier=None): 

500 # if we are given an identifier, it has to be valid 

501 if identifier is not None: 

502 if not dao.identifier_exists(identifier): 

503 return IdDoesNotExist(base_url) 

504 

505 # get the configured formats - there should always be some, but just in case 

506 # the service is mis-configured, this will throw the correct error 

507 formats = app.config.get("OAIPMH_METADATA_FORMATS", {}).get(specified_oai_endpoint) 

508 if formats is None or len(formats) == 0: 

509 return NoMetadataFormats(base_url) 

510 

511 # create and return the list metadata formats response 

512 oai_id = None 

513 if identifier is not None: 

514 oai_id = make_oai_identifier(identifier, dao.__type__) 

515 lmf = ListMetadataFormats(base_url=base_url, identifier=oai_id) 

516 for f in formats: 

517 lmf.add_format(f.get("metadataPrefix"), f.get("schema"), f.get("metadataNamespace")) 

518 return lmf 

519 

520 

521def list_records(dao, base_url, specified_oai_endpoint, metadata_prefix=None, from_date=None, until_date=None, oai_set=None, resumption_token=None): 

522 

523 if resumption_token is None: 

524 # do an initial list records 

525 return _parameterised_list_records(dao, base_url, specified_oai_endpoint, metadata_prefix=metadata_prefix, from_date=from_date, until_date=until_date, oai_set=oai_set) 

526 else: 

527 # Technically speaking, having other parameters should result in a BadRequest. This is interfering with 

528 # the premium metadata mechanism, though, so just ignoring that for now. The resumption token overrides 

529 # all other parameters anyway 

530 

531 # resumption of previous request 

532 # if (metadata_prefix is not None or from_date is not None or until_date is not None 

533 # or oai_set is not None): 

534 # return BadArgument(base_url) 

535 return _resume_list_records(dao, base_url, specified_oai_endpoint, resumption_token=resumption_token) 

536 

537 

538def _parameterised_list_records(dao, base_url, specified_oai_endpoint, metadata_prefix=None, from_date=None, until_date=None, oai_set=None, start_number=0, start_after=None): 

539 return _parameterised_list('records', dao, base_url, specified_oai_endpoint, metadata_prefix, from_date, until_date, oai_set, start_number, start_after) 

540 

541 

542def _resume_list_records(dao, base_url, specified_oai_endpoint, resumption_token=None): 

543 try: 

544 params = decode_resumption_token(resumption_token) 

545 except ResumptionTokenException: 

546 return BadResumptionToken(base_url) 

547 return _parameterised_list_records(dao, base_url, specified_oai_endpoint, **params) 

548 

549 

550def list_sets(dao, base_url, resumption_token=None): 

551 # This implementation does not support resumption tokens for this operation 

552 if resumption_token is not None: 

553 return BadResumptionToken(base_url) 

554 

555 # just ask the DAO to get a list of all the sets for us, then we 

556 # give the set spec and set name as the same string 

557 ls = ListSets(base_url) 

558 sets = dao.list_sets() 

559 for s in sets: 

560 ls.add_set(make_set_spec(s), s) 

561 return ls 

562 

563 

564##################################################################### 

565# Objects 

566##################################################################### 

567 

568class OAI_PMH(object): 

569 VERSION = "2.0" 

570 

571 PMH_NAMESPACE = "http://www.openarchives.org/OAI/2.0/" 

572 PMH = "{%s}" % PMH_NAMESPACE 

573 

574 XSI_NAMESPACE = "http://www.w3.org/2001/XMLSchema-instance" 

575 XSI = "{%s}" % XSI_NAMESPACE 

576 

577 NSMAP = {None : PMH_NAMESPACE, "xsi" : XSI_NAMESPACE} 

578 

579 def __init__(self, base_url): 

580 self.base_url = base_url 

581 self.verb = None 

582 

583 def _to_xml(self): 

584 oai = etree.Element(self.PMH + "OAI-PMH", nsmap=self.NSMAP) 

585 oai.set(self.XSI + "schemaLocation", 

586 "http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd") 

587 

588 respdate = etree.SubElement(oai, self.PMH + "responseDate") 

589 respdate.text = get_response_date() 

590 

591 req = etree.SubElement(oai, self.PMH + "request") 

592 if self.verb is not None: 

593 req.set("verb", self.verb) 

594 req.text = self.base_url 

595 self.add_request_attributes(req) 

596 

597 element = self.get_element() 

598 oai.append(element) 

599 

600 return oai 

601 

602 def serialise(self): 

603 xml = self._to_xml() 

604 return etree.tostring(xml, xml_declaration=True, encoding="UTF-8") 

605 

606 def get_element(self): 

607 raise NotImplementedError() 

608 

609 def add_request_attributes(self, element): 

610 return 

611 

612 

613class GetRecord(OAI_PMH): 

614 def __init__(self, base_url, identifier, metadata_prefix): 

615 super(GetRecord, self).__init__(base_url) 

616 self.verb = "GetRecord" 

617 self.identifier = identifier 

618 self.metadata_prefix = metadata_prefix 

619 self.metadata = None 

620 self.header = None 

621 

622 def get_element(self): 

623 gr = etree.Element(self.PMH + "GetRecord", nsmap=self.NSMAP) 

624 record = etree.SubElement(gr, self.PMH + "record") 

625 

626 record.append(self.header) 

627 if self.metadata is not None: 

628 record.append(self.metadata) 

629 

630 return gr 

631 

632 def add_request_attributes(self, element): 

633 if self.identifier is not None: 

634 element.set("identifier", self.identifier) 

635 if self.metadata_prefix is not None: 

636 element.set("metadataPrefix", self.metadata_prefix) 

637 

638 

639class Identify(OAI_PMH): 

640 def __init__(self, base_url, repo_name, admin_email): 

641 super(Identify, self).__init__(base_url) 

642 self.verb = "Identify" 

643 self.repo_name = repo_name 

644 self.admin_email = admin_email 

645 self.earliest_datestamp = None 

646 

647 def get_element(self): 

648 identify = etree.Element(self.PMH + "Identify", nsmap=self.NSMAP) 

649 

650 repo_name = etree.SubElement(identify, self.PMH + "repositoryName") 

651 repo_name.text = self.repo_name 

652 

653 base = etree.SubElement(identify, self.PMH + "baseURL") 

654 base.text = self.base_url 

655 

656 protocol = etree.SubElement(identify, self.PMH + "protocolVersion") 

657 protocol.text = self.VERSION 

658 

659 admin_email = etree.SubElement(identify, self.PMH + "adminEmail") 

660 admin_email.text = self.admin_email 

661 

662 earliest = etree.SubElement(identify, self.PMH + "earliestDatestamp") 

663 if self.earliest_datestamp is not None: 

664 earliest.text = self.earliest_datestamp 

665 else: 

666 # earliest.text = "1970-01-01T00:00:00Z" # beginning of the unix epoch 

667 DateFormat.default_earliest() 

668 

669 deletes = etree.SubElement(identify, self.PMH + "deletedRecord") 

670 deletes.text = "transient" # keep the door open 

671 

672 granularity = etree.SubElement(identify, self.PMH + "granularity") 

673 # granularity.text = "YYYY-MM-DD" 

674 granularity.text = DateFormat.granularity() 

675 

676 return identify 

677 

678 

679class ListIdentifiers(OAI_PMH): 

680 def __init__(self, base_url, from_date=None, until_date=None, oai_set=None, metadata_prefix=None): 

681 super(ListIdentifiers, self).__init__(base_url) 

682 self.verb = "ListIdentifiers" 

683 self.from_date = from_date 

684 self.until_date = until_date 

685 self.oai_set = oai_set 

686 self.metadata_prefix = metadata_prefix 

687 self.records = [] 

688 self.resumption = None 

689 

690 def set_resumption(self, resumption_token, complete_list_size=None, cursor=None, expiry=-1): 

691 self.resumption = {"resumption_token" : resumption_token, "expiry" : expiry} 

692 if complete_list_size is not None: 

693 self.resumption["complete_list_size"] = complete_list_size 

694 if cursor is not None: 

695 self.resumption["cursor"] = cursor 

696 

697 def add_record(self, header): 

698 self.records.append(header) 

699 

700 def add_request_attributes(self, element): 

701 if self.from_date is not None: 

702 element.set("from", self.from_date) 

703 if self.until_date is not None: 

704 element.set("until", self.until_date) 

705 if self.oai_set is not None: 

706 element.set("set", self.oai_set) 

707 if self.metadata_prefix is not None: 

708 element.set("metadataPrefix", self.metadata_prefix) 

709 

710 def get_element(self): 

711 lr = etree.Element(self.PMH + "ListIdentifiers", nsmap=self.NSMAP) 

712 

713 for header in self.records: 

714 lr.append(header) 

715 

716 if self.resumption is not None: 

717 rt = etree.SubElement(lr, self.PMH + "resumptionToken") 

718 if "complete_list_size" in self.resumption: 

719 rt.set("completeListSize", str(self.resumption.get("complete_list_size"))) 

720 if "cursor" in self.resumption: 

721 rt.set("cursor", str(self.resumption.get("cursor"))) 

722 expiry = self.resumption.get("expiry", -1) 

723 if expiry >= 0: 

724 expire_date = DateFormat.format(dates.now() + timedelta(0, expiry)) 

725 rt.set("expirationDate", expire_date) 

726 rt.text = self.resumption.get("resumption_token") 

727 

728 return lr 

729 

730 

731class ListMetadataFormats(OAI_PMH): 

732 def __init__(self, base_url, identifier=None): 

733 super(ListMetadataFormats, self).__init__(base_url) 

734 self.verb = "ListMetadataFormats" 

735 self.identifier = identifier 

736 self.formats = [] 

737 

738 def add_format(self, metadata_prefix, schema, metadata_namespace): 

739 self.formats.append( 

740 { 

741 "metadataPrefix": metadata_prefix, 

742 "schema": schema, 

743 "metadataNamespace": metadata_namespace 

744 } 

745 ) 

746 

747 def add_request_attributes(self, element): 

748 if self.identifier is not None: 

749 element.set("identifier", self.identifier) 

750 

751 def get_element(self): 

752 lmf = etree.Element(self.PMH + "ListMetadataFormats", nsmap=self.NSMAP) 

753 

754 for f in self.formats: 

755 mdf = etree.SubElement(lmf, self.PMH + "metadataFormat") 

756 

757 mdp = etree.SubElement(mdf, self.PMH + "metadataPrefix") 

758 mdp.text = f.get("metadataPrefix") 

759 

760 sch = etree.SubElement(mdf, self.PMH + "schema") 

761 sch.text = f.get("schema") 

762 

763 mdn = etree.SubElement(mdf, self.PMH + "metadataNamespace") 

764 mdn.text = f.get("metadataNamespace") 

765 

766 return lmf 

767 

768 

769class ListRecords(OAI_PMH): 

770 def __init__(self, base_url, from_date=None, until_date=None, oai_set=None, metadata_prefix=None): 

771 super(ListRecords, self).__init__(base_url) 

772 self.verb = "ListRecords" 

773 self.from_date = from_date 

774 self.until_date = until_date 

775 self.oai_set = oai_set 

776 self.metadata_prefix = metadata_prefix 

777 self.records = [] 

778 self.resumption = None 

779 self.resumption_expiry = -1 

780 

781 def set_resumption(self, resumption_token, complete_list_size=None, cursor=None, expiry=-1): 

782 self.resumption = {"resumption_token" : resumption_token, "expiry" : expiry} 

783 if complete_list_size is not None: 

784 self.resumption["complete_list_size"] = complete_list_size 

785 if cursor is not None: 

786 self.resumption["cursor"] = cursor 

787 

788 def add_record(self, metadata, header): 

789 self.records.append((metadata, header)) 

790 

791 def add_request_attributes(self, element): 

792 if self.from_date is not None: 

793 element.set("from", self.from_date) 

794 if self.until_date is not None: 

795 element.set("until", self.until_date) 

796 if self.oai_set is not None: 

797 element.set("set", self.oai_set) 

798 if self.metadata_prefix is not None: 

799 element.set("metadataPrefix", self.metadata_prefix) 

800 

801 def get_element(self): 

802 lr = etree.Element(self.PMH + "ListRecords", nsmap=self.NSMAP) 

803 

804 for metadata, header in self.records: 

805 r = etree.SubElement(lr, self.PMH + "record") 

806 r.append(header) 

807 if metadata is not None: 

808 r.append(metadata) 

809 

810 if self.resumption is not None: 

811 rt = etree.SubElement(lr, self.PMH + "resumptionToken") 

812 if "complete_list_size" in self.resumption: 

813 rt.set("completeListSize", str(self.resumption.get("complete_list_size"))) 

814 if "cursor" in self.resumption: 

815 rt.set("cursor", str(self.resumption.get("cursor"))) 

816 expiry = self.resumption.get("expiry", -1) 

817 if expiry >= 0: 

818 expire_date = DateFormat.format(dates.now() + timedelta(0, expiry)) 

819 rt.set("expirationDate", expire_date) 

820 rt.text = self.resumption.get("resumption_token") 

821 

822 return lr 

823 

824 

825class ListSets(OAI_PMH): 

826 def __init__(self, base_url): 

827 super(ListSets, self).__init__(base_url) 

828 self.verb = "ListSets" 

829 self.sets = [] 

830 

831 def add_set(self, spec, name): 

832 self.sets.append((spec, name)) 

833 

834 def get_element(self): 

835 ls = etree.Element(self.PMH + "ListSets", nsmap=self.NSMAP) 

836 

837 for spec, name in self.sets: 

838 s = etree.SubElement(ls, self.PMH + "set") 

839 specel = etree.SubElement(s, self.PMH + "setSpec") 

840 specel.text = spec 

841 nameel = etree.SubElement(s, self.PMH + "setName") 

842 nameel.text = name 

843 

844 return ls 

845 

846 

847##################################################################### 

848# Error Handling 

849##################################################################### 

850 

851class OAIPMHError(OAI_PMH): 

852 def __init__(self, base_url): 

853 super(OAIPMHError, self).__init__(base_url) 

854 self.code = None 

855 self.description = None 

856 

857 def get_element(self): 

858 error = etree.Element(self.PMH + "error", nsmap=self.NSMAP) 

859 

860 if self.code is not None: 

861 error.set("code", self.code) 

862 

863 if self.description is not None: 

864 error.text = self.description 

865 

866 return error 

867 

868 

869class BadArgument(OAIPMHError): 

870 def __init__(self, base_url): 

871 super(BadArgument, self).__init__(base_url) 

872 self.code = "badArgument" 

873 self.description = "The request includes illegal arguments, is missing required arguments, includes a repeated argument, or values for arguments have an illegal syntax." 

874 

875 

876class BadResumptionToken(OAIPMHError): 

877 def __init__(self, base_url): 

878 super(BadResumptionToken, self).__init__(base_url) 

879 self.code = "badResumptionToken" 

880 self.description = "The value of the resumptionToken argument is invalid or expired." 

881 

882 

883class BadVerb(OAIPMHError): 

884 def __init__(self, base_url): 

885 super(BadVerb, self).__init__(base_url) 

886 self.code = "badVerb" 

887 self.description = "Value of the verb argument is not a legal OAI-PMH verb, the verb argument is missing, or the verb argument is repeated." 

888 

889 

890class CannotDisseminateFormat(OAIPMHError): 

891 def __init__(self, base_url): 

892 super(CannotDisseminateFormat, self).__init__(base_url) 

893 self.code = "cannotDisseminateFormat" 

894 self.description = "The metadata format identified by the value given for the metadataPrefix argument is not supported by the item or by the repository." 

895 

896 

897class IdDoesNotExist(OAIPMHError): 

898 def __init__(self, base_url): 

899 super(IdDoesNotExist, self).__init__(base_url) 

900 self.code = "idDoesNotExist" 

901 self.description = "The value of the identifier argument is unknown or illegal in this repository." 

902 

903 

904class NoRecordsMatch(OAIPMHError): 

905 def __init__(self, base_url): 

906 super(NoRecordsMatch, self).__init__(base_url) 

907 self.code = "noRecordsMatch" 

908 self.description = "The combination of the values of the from, until, set and metadataPrefix arguments results in an empty list." 

909 

910 

911class NoMetadataFormats(OAIPMHError): 

912 def __init__(self, base_url): 

913 super(NoMetadataFormats, self).__init__(base_url) 

914 self.code = "noMetadataFormats" 

915 self.description = "There are no metadata formats available for the specified item." 

916 

917 

918class NoSetHierarchy(OAIPMHError): 

919 def __init__(self, base_url): 

920 super(NoSetHierarchy, self).__init__(base_url) 

921 self.code = "noSetHierarchy" 

922 self.description = "The repository does not support sets."