Coverage for portality / view / oaipmh.py: 89%
576 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-05 00:09 +0100
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-05 00:09 +0100
1import binascii
2import json, base64
4from lxml import etree
5from datetime import datetime, timedelta
6from flask import Blueprint, request, make_response
7from flask_login import current_user
8from portality.core import app
9from portality.lib.dates import FMT_DATETIME_STD, DEFAULT_TIMESTAMP_VAL, FMT_DATE_STD
10from portality.models import OAIPMHJournal, OAIPMHArticle
11from portality.lib import plausible, dates
12from portality.crosswalks.oaipmh import CROSSWALKS, make_set_spec, make_oai_identifier
13from portality import constants
15blueprint = Blueprint('oaipmh', __name__)
18#####################################################################
19# Web API endpoints
20#####################################################################
22@blueprint.route("/oai", methods=["GET", "POST"])
23@blueprint.route("/oai.<specified>", methods=["GET", "POST"])
24def oaipmh(specified=None):
25 # Google Analytics event, we don't know the action yet but it will be required.
26 event_payload = {}
27 # work out which endpoint we're going to
28 if specified is None:
29 dao = OAIPMHJournal()
30 event_payload['label'] = 'Journal'
31 else:
32 specified = specified.lower()
33 dao = OAIPMHArticle()
34 event_payload['label'] = 'Article'
36 # Add the identifier to the event if there is one
37 ident = request.values.get('identifier', None)
38 if ident is not None:
39 event_payload[app.config.get('ANALYTICS_DIMENSIONS')['oai_res_id']] = ident
41 # work out the verb and associated parameters
42 verb = request.values.get("verb")
43 event_payload['action'] = verb
45 # Now we have enough information about the request to send to analytics.
46 plausible.send_event(app.config.get('ANALYTICS_CATEGORY_OAI', 'OAI-PMH'),
47 **event_payload)
49 # call the appropriate protocol operation:
50 # if no verb supplied
51 if verb is None:
52 result = BadVerb(request.base_url)
54 # Identify
55 elif verb.lower() == "identify":
56 result = identify(dao, request.base_url)
58 # ListMetadataFormats
59 elif verb.lower() == "listmetadataformats":
60 params = list_metadata_formats_params(request)
61 result = list_metadata_formats(dao, request.base_url, specified, **params)
63 # GetRecord
64 elif verb.lower() == "getrecord":
65 params = get_record_params(request)
66 result = get_record(dao, request.base_url, specified, **params)
68 # ListSets
69 elif verb.lower() == "listsets":
70 params = list_sets_params(request)
71 result = list_sets(dao, request.base_url, **params)
73 # ListRecords
74 elif verb.lower() == "listrecords":
75 params = list_records_params(request)
76 result = list_records(dao, request.base_url, specified, **params)
78 # ListIdentifiers
79 elif verb.lower() == "listidentifiers":
80 params = list_identifiers_params(request)
81 result = list_identifiers(dao, request.base_url, specified, **params)
83 # A verb we didn't understand
84 else:
85 result = BadVerb(request.base_url)
87 # serialise and return
88 resp = make_response(result.serialise())
89 resp.mimetype = "text/xml"
90 return resp
93#####################################################################
94# Utility methods/objects
95#####################################################################
97class DateFormat(object):
98 @classmethod
99 def granularity(self):
100 return "YYYY-MM-DDThh:mm:ssZ"
102 @classmethod
103 def default_earliest(cls):
104 return DEFAULT_TIMESTAMP_VAL
106 @classmethod
107 def now(cls):
108 return dates.now_str()
110 @classmethod
111 def format(cls, date):
112 return date.strftime(FMT_DATETIME_STD)
114 @classmethod
115 def legitimate_granularity(cls, datestr):
116 formats = [FMT_DATE_STD, FMT_DATETIME_STD]
117 success = False
118 for f in formats:
119 try:
120 datetime.strptime(datestr, f)
121 success = True
122 break
123 except Exception:
124 pass
125 return success
128def decode_set_spec(setspec):
129 # first, make sure the setspec is a string
130 """
131 try:
132 setspec = setspec.encode("utf-8")
133 except:
134 raise SetSpecException()
135 """
136 # switch the ~ for =
137 setspec = setspec.replace("~", "=")
139 try:
140 # base64 decode
141 decoded = base64.urlsafe_b64decode(setspec)
142 decoded = decoded.decode("utf-8")
143 except:
144 raise SetSpecException()
146 return decoded
149def get_start_after(docs, current_start_after, list_size):
150 last_date = docs[-1].get("last_updated")
151 count = 0
152 for doc in docs:
153 if doc.get("last_updated") == last_date:
154 count += 1
155 if count == list_size and current_start_after is not None and last_date == current_start_after[0]:
156 # If the current set of records have the same date as last record served previously,
157 # the count has to be greater than the list of records
158 # and include the previous count
159 count += current_start_after[1]
160 return (last_date, count)
163def make_resumption_token(metadata_prefix=None, from_date=None, until_date=None, oai_set=None, start_number=None, start_after=None):
164 d = {}
165 if metadata_prefix is not None:
166 d["m"] = metadata_prefix
167 if from_date is not None:
168 d["f"] = from_date
169 if until_date is not None:
170 d["u"] = until_date
171 if oai_set is not None:
172 d["s"] = oai_set
173 if start_number is not None:
174 d["n"] = start_number
175 if start_after is not None:
176 d["a"] = start_after
177 j = json.dumps(d)
178 b = base64.urlsafe_b64encode(j.encode('utf-8'))
179 return b
182class ResumptionTokenException(Exception):
183 pass
186class SetSpecException(Exception):
187 pass
190def decode_resumption_token(resumption_token, account=None):
191 # attempt to parse the resumption token out of base64 encoding and as a json object
192 try:
193 j = base64.urlsafe_b64decode(str(resumption_token))
194 d = json.loads(j.decode("utf-8")) # convert the bytes to str for pre 3.5 compat
195 except (TypeError, binascii.Error, ValueError):
196 raise ResumptionTokenException()
198 # if we succeed read out the parameters
199 params = {}
200 if "m" in d: params["metadata_prefix"] = d.get("m")
201 if "f" in d: params["from_date"] = d.get("f")
202 if "u" in d: params["until_date"] = d.get("u")
203 if "s" in d: params["oai_set"] = d.get("s")
204 if "n" in d: params["start_number"] = d.get("n")
205 if "a" in d: params["start_after"] = tuple(d.get("a"))
207 if "until_date" in params:
208 params["until_date"] = _premium_until_date(params["until_date"], account)
209 else:
210 ud = _premium_until_date(None, account)
211 if ud is not None:
212 params["until_date"] = ud
214 return params
217def extract_internal_id(oai_identifier):
218 # most of the identifier is for show - we only care about the hex string at the end
219 return oai_identifier.split(":")[-1]
222def get_response_date():
223 return DateFormat.now()
226def get_crosswalk(prefix, datatype):
227 return CROSSWALKS.get(prefix, {}).get(datatype)()
230def list_metadata_formats_params(req):
231 identifier = req.values.get("identifier")
232 if identifier is not None:
233 identifier = extract_internal_id(identifier)
234 return {"identifier" : identifier}
237def get_record_params(req):
238 identifier = req.values.get("identifier")
239 metadata_prefix = req.values.get("metadataPrefix")
240 if identifier is not None:
241 identifier = extract_internal_id(identifier)
242 return {"identifier": identifier, "metadata_prefix": metadata_prefix}
245def list_sets_params(req):
246 resumption = req.values.get("resumptionToken")
247 return {"resumption_token" : resumption}
250def list_records_params(req):
251 from_date = req.values.get("from")
252 until_date = req.values.get("until")
253 oai_set = req.values.get("set")
254 resumption_token = req.values.get("resumptionToken")
255 metadata_prefix = req.values.get("metadataPrefix")
257 # now fix the until_date according to the premium user requirement
258 until_date = _premium_until_date(until_date)
260 return {
261 "from_date": from_date,
262 "until_date": until_date,
263 "oai_set": oai_set,
264 "resumption_token": resumption_token,
265 "metadata_prefix": metadata_prefix
266 }
268def _premium_until_date(until_date, account=None):
269 # If a user is not authenticated:
270 #
271 # If the requested until_date is empty/None, set this to a timestamp 1 month ago
272 # If the requested until_date is a date that is less than a month ago, set this to a timestamp 1 month ago
273 # If the requested until_date is a date more than 1 month ago, leave unchanged
275 if not app.config.get("PREMIUM_MODE", True):
276 return until_date
278 if account is None:
279 if current_user and not current_user.is_anonymous:
280 account = current_user._get_current_object()
282 if account is not None and account.has_role(constants.ROLE_PREMIUM_OAI):
283 return until_date
285 non_premium_delay_seconds = app.config.get("NON_PREMIUM_DELAY_SECONDS", 2592000)
287 # if we are in the phase-in period, cap the delay to the phase in date
288 if app.config.get("PREMIUM_PHASE_IN", False):
289 phase_in_start = app.config.get("PREMIUM_PHASE_IN_START")
290 if phase_in_start is not None:
291 max_delay = dates.now() - phase_in_start
292 if max_delay.total_seconds() < non_premium_delay_seconds:
293 non_premium_delay_seconds = max_delay.total_seconds()
295 non_premium_delay = dates.before_now(non_premium_delay_seconds)
297 if until_date is None:
298 return dates.format(non_premium_delay)
299 else:
300 d = None
301 try:
302 d = dates.parse(until_date)
303 except ValueError:
304 return dates.format(non_premium_delay)
306 if d and dates.is_after(d, non_premium_delay):
307 return dates.format(non_premium_delay)
309 return until_date
311def list_identifiers_params(req):
312 from_date = req.values.get("from")
313 until_date = req.values.get("until")
314 oai_set = req.values.get("set")
315 resumption_token = req.values.get("resumptionToken")
316 metadata_prefix = req.values.get("metadataPrefix")
318 # now fix the until_date according to the premium user requirement
319 until_date = _premium_until_date(until_date)
321 return {
322 "from_date": from_date,
323 "until_date": until_date,
324 "oai_set": oai_set,
325 "resumption_token": resumption_token,
326 "metadata_prefix": metadata_prefix
327 }
329#####################################################################
330# OAI-PMH protocol operations implemented
331#####################################################################
333def get_record(dao, base_url, specified_oai_endpoint, identifier=None, metadata_prefix=None):
334 # check that we have both identifier and prefix - they are both required
335 if identifier is None or metadata_prefix is None:
336 return BadArgument(base_url)
338 # get the formats and check that we have formats that we can disseminate
339 formats = app.config.get("OAIPMH_METADATA_FORMATS", {}).get(specified_oai_endpoint)
340 if formats is None or len(formats) == 0:
341 return CannotDisseminateFormat(base_url)
343 # look for our record of the format we've been asked for
344 for f in formats:
345 if f.get("metadataPrefix") == metadata_prefix:
346 # obtain the record from the dao
347 record = dao.pull(identifier)
348 if record is None:
349 return IdDoesNotExist(base_url)
350 # do the crosswalk
351 xwalk = get_crosswalk(f.get("metadataPrefix"), dao.__type__)
353 header = xwalk.header(record)
354 oai_id = make_oai_identifier(identifier, dao.__type__)
355 gr = GetRecord(base_url, oai_id, metadata_prefix)
356 gr.header = header
358 if record.is_in_doaj():
359 metadata = xwalk.crosswalk(record)
360 gr.metadata = metadata
362 return gr
364 # if we have not returned already, this means we can't disseminate this format
365 return CannotDisseminateFormat(base_url)
368def identify(dao, base_url):
369 repo_name = app.config.get("SERVICE_NAME")
370 admin_email = app.config.get("OAI_ADMIN_EMAIL", app.config.get("ADMIN_EMAIL"))
371 idobj = Identify(base_url, repo_name, admin_email)
372 idobj.earliest_datestamp = dao.earliest_datestamp()
373 return idobj
376def list_identifiers(dao, base_url, specified_oai_endpoint, metadata_prefix=None, from_date=None, until_date=None, oai_set=None, resumption_token=None):
377 if resumption_token is None:
378 # do an initial list records
379 return _parameterised_list_identifiers(
380 dao, base_url,
381 specified_oai_endpoint, metadata_prefix=metadata_prefix, from_date=from_date,
382 until_date=until_date, oai_set=oai_set
383 )
384 else:
385 # resumption of previous request
386 if (metadata_prefix is not None or from_date is not None or until_date is not None
387 or oai_set is not None):
388 return BadArgument(base_url)
389 return _resume_list_identifiers(dao, base_url, specified_oai_endpoint, resumption_token=resumption_token)
392def _parameterised_list(identifiers_or_records, dao, base_url, specified_oai_endpoint, metadata_prefix=None, from_date=None, until_date=None, oai_set=None, start_number=0, start_after=None):
393 # metadata prefix is required
394 if metadata_prefix is None:
395 return BadArgument(base_url)
397 # get the formats and check that we have formats that we can disseminate
398 formats = app.config.get("OAIPMH_METADATA_FORMATS", {}).get(specified_oai_endpoint)
399 if formats is None or len(formats) == 0:
400 return CannotDisseminateFormat(base_url)
402 # check that the dates are formatted correctly
403 fl = True
404 ul = True
405 if from_date is not None:
406 fl = DateFormat.legitimate_granularity(from_date)
407 if until_date is not None:
408 ul = DateFormat.legitimate_granularity(until_date)
410 if not fl or not ul:
411 return BadArgument(base_url)
413 # get the result set size
414 list_size = app.config.get("OAIPMH_LIST_IDENTIFIERS_PAGE_SIZE", 25)
416 # decode the oai_set to something we can query with
417 try:
418 decoded_set = decode_set_spec(oai_set) if oai_set is not None else None
419 except SetSpecException:
420 return BadArgument(base_url)
422 for f in formats:
423 if f.get("metadataPrefix") == metadata_prefix:
424 # do the query and set up the response object
425 total, results = dao.list_records(from_date, until_date, decoded_set, list_size, start_after)
427 # if there are no results, PMH requires us to throw an error
428 if len(results) == 0:
429 return NoRecordsMatch(base_url)
431 # Get the full total
432 # Each search with a resumption token is a new search,
433 # so the total is not the same as the first search
434 # but is reduced by number of records already served.
435 # This full_total is the total as in the first search
436 full_total = total
437 if start_after is not None:
438 full_total = total + start_number - start_after[1]
440 # Determine where our next starting index will be
441 new_start = start_number + len(results)
443 # Work out if we need a resumption token. It can have one of 3 values:
444 # - None -> do not include the rt in the response if we have a full result set
445 # - the empty string -> include in the response if this is the last set of results from an incomplete list
446 # - some value -> include in the response if there are more values to retrieve
447 if len(results) == full_total:
448 resumption_token = None
449 elif new_start == full_total:
450 resumption_token = ''
451 else:
452 new_start_after = get_start_after(results, start_after, list_size)
453 resumption_token = make_resumption_token(metadata_prefix=metadata_prefix, from_date=from_date,
454 until_date=until_date, oai_set=oai_set, start_number=new_start,
455 start_after=new_start_after)
457 # Get our list of results for this request
458 if identifiers_or_records == 'identifiers':
459 lst = ListIdentifiers(base_url, from_date=from_date, until_date=until_date, oai_set=oai_set,
460 metadata_prefix=metadata_prefix)
461 else: # ListRecords
462 lst = ListRecords(base_url, from_date=from_date, until_date=until_date, oai_set=oai_set,
463 metadata_prefix=metadata_prefix)
465 if resumption_token is not None:
466 expiry = app.config.get("OAIPMH_RESUMPTION_TOKEN_EXPIRY", -1)
467 lst.set_resumption(resumption_token, complete_list_size=full_total, cursor=new_start, expiry=expiry)
469 for r in results:
470 # do the crosswalk
471 xwalk = get_crosswalk(f.get("metadataPrefix"), dao.__type__)
472 header = xwalk.header(r)
474 if identifiers_or_records == 'identifiers':
475 # add to the response (header only)
476 lst.add_record(header)
477 else: # ListRecords
478 metadata = xwalk.crosswalk(r)
479 # add to the response (metadata and
480 lst.add_record(metadata, header)
481 return lst
483 # if we have not returned already, this means we can't disseminate this format
484 return CannotDisseminateFormat(base_url)
487def _parameterised_list_identifiers(dao, base_url, specified_oai_endpoint, metadata_prefix=None, from_date=None, until_date=None, oai_set=None, start_number=0, start_after=None):
488 return _parameterised_list('identifiers', dao, base_url, specified_oai_endpoint, metadata_prefix, from_date, until_date, oai_set, start_number, start_after)
491def _resume_list_identifiers(dao, base_url, specified_oai_endpoint, resumption_token=None):
492 try:
493 params = decode_resumption_token(resumption_token)
494 except ResumptionTokenException:
495 return BadResumptionToken(base_url)
496 return _parameterised_list_identifiers(dao, base_url, specified_oai_endpoint, **params)
499def list_metadata_formats(dao, base_url, specified_oai_endpoint, identifier=None):
500 # if we are given an identifier, it has to be valid
501 if identifier is not None:
502 if not dao.identifier_exists(identifier):
503 return IdDoesNotExist(base_url)
505 # get the configured formats - there should always be some, but just in case
506 # the service is mis-configured, this will throw the correct error
507 formats = app.config.get("OAIPMH_METADATA_FORMATS", {}).get(specified_oai_endpoint)
508 if formats is None or len(formats) == 0:
509 return NoMetadataFormats(base_url)
511 # create and return the list metadata formats response
512 oai_id = None
513 if identifier is not None:
514 oai_id = make_oai_identifier(identifier, dao.__type__)
515 lmf = ListMetadataFormats(base_url=base_url, identifier=oai_id)
516 for f in formats:
517 lmf.add_format(f.get("metadataPrefix"), f.get("schema"), f.get("metadataNamespace"))
518 return lmf
521def list_records(dao, base_url, specified_oai_endpoint, metadata_prefix=None, from_date=None, until_date=None, oai_set=None, resumption_token=None):
523 if resumption_token is None:
524 # do an initial list records
525 return _parameterised_list_records(dao, base_url, specified_oai_endpoint, metadata_prefix=metadata_prefix, from_date=from_date, until_date=until_date, oai_set=oai_set)
526 else:
527 # Technically speaking, having other parameters should result in a BadRequest. This is interfering with
528 # the premium metadata mechanism, though, so just ignoring that for now. The resumption token overrides
529 # all other parameters anyway
531 # resumption of previous request
532 # if (metadata_prefix is not None or from_date is not None or until_date is not None
533 # or oai_set is not None):
534 # return BadArgument(base_url)
535 return _resume_list_records(dao, base_url, specified_oai_endpoint, resumption_token=resumption_token)
538def _parameterised_list_records(dao, base_url, specified_oai_endpoint, metadata_prefix=None, from_date=None, until_date=None, oai_set=None, start_number=0, start_after=None):
539 return _parameterised_list('records', dao, base_url, specified_oai_endpoint, metadata_prefix, from_date, until_date, oai_set, start_number, start_after)
542def _resume_list_records(dao, base_url, specified_oai_endpoint, resumption_token=None):
543 try:
544 params = decode_resumption_token(resumption_token)
545 except ResumptionTokenException:
546 return BadResumptionToken(base_url)
547 return _parameterised_list_records(dao, base_url, specified_oai_endpoint, **params)
550def list_sets(dao, base_url, resumption_token=None):
551 # This implementation does not support resumption tokens for this operation
552 if resumption_token is not None:
553 return BadResumptionToken(base_url)
555 # just ask the DAO to get a list of all the sets for us, then we
556 # give the set spec and set name as the same string
557 ls = ListSets(base_url)
558 sets = dao.list_sets()
559 for s in sets:
560 ls.add_set(make_set_spec(s), s)
561 return ls
564#####################################################################
565# Objects
566#####################################################################
568class OAI_PMH(object):
569 VERSION = "2.0"
571 PMH_NAMESPACE = "http://www.openarchives.org/OAI/2.0/"
572 PMH = "{%s}" % PMH_NAMESPACE
574 XSI_NAMESPACE = "http://www.w3.org/2001/XMLSchema-instance"
575 XSI = "{%s}" % XSI_NAMESPACE
577 NSMAP = {None : PMH_NAMESPACE, "xsi" : XSI_NAMESPACE}
579 def __init__(self, base_url):
580 self.base_url = base_url
581 self.verb = None
583 def _to_xml(self):
584 oai = etree.Element(self.PMH + "OAI-PMH", nsmap=self.NSMAP)
585 oai.set(self.XSI + "schemaLocation",
586 "http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd")
588 respdate = etree.SubElement(oai, self.PMH + "responseDate")
589 respdate.text = get_response_date()
591 req = etree.SubElement(oai, self.PMH + "request")
592 if self.verb is not None:
593 req.set("verb", self.verb)
594 req.text = self.base_url
595 self.add_request_attributes(req)
597 element = self.get_element()
598 oai.append(element)
600 return oai
602 def serialise(self):
603 xml = self._to_xml()
604 return etree.tostring(xml, xml_declaration=True, encoding="UTF-8")
606 def get_element(self):
607 raise NotImplementedError()
609 def add_request_attributes(self, element):
610 return
613class GetRecord(OAI_PMH):
614 def __init__(self, base_url, identifier, metadata_prefix):
615 super(GetRecord, self).__init__(base_url)
616 self.verb = "GetRecord"
617 self.identifier = identifier
618 self.metadata_prefix = metadata_prefix
619 self.metadata = None
620 self.header = None
622 def get_element(self):
623 gr = etree.Element(self.PMH + "GetRecord", nsmap=self.NSMAP)
624 record = etree.SubElement(gr, self.PMH + "record")
626 record.append(self.header)
627 if self.metadata is not None:
628 record.append(self.metadata)
630 return gr
632 def add_request_attributes(self, element):
633 if self.identifier is not None:
634 element.set("identifier", self.identifier)
635 if self.metadata_prefix is not None:
636 element.set("metadataPrefix", self.metadata_prefix)
639class Identify(OAI_PMH):
640 def __init__(self, base_url, repo_name, admin_email):
641 super(Identify, self).__init__(base_url)
642 self.verb = "Identify"
643 self.repo_name = repo_name
644 self.admin_email = admin_email
645 self.earliest_datestamp = None
647 def get_element(self):
648 identify = etree.Element(self.PMH + "Identify", nsmap=self.NSMAP)
650 repo_name = etree.SubElement(identify, self.PMH + "repositoryName")
651 repo_name.text = self.repo_name
653 base = etree.SubElement(identify, self.PMH + "baseURL")
654 base.text = self.base_url
656 protocol = etree.SubElement(identify, self.PMH + "protocolVersion")
657 protocol.text = self.VERSION
659 admin_email = etree.SubElement(identify, self.PMH + "adminEmail")
660 admin_email.text = self.admin_email
662 earliest = etree.SubElement(identify, self.PMH + "earliestDatestamp")
663 if self.earliest_datestamp is not None:
664 earliest.text = self.earliest_datestamp
665 else:
666 # earliest.text = "1970-01-01T00:00:00Z" # beginning of the unix epoch
667 DateFormat.default_earliest()
669 deletes = etree.SubElement(identify, self.PMH + "deletedRecord")
670 deletes.text = "transient" # keep the door open
672 granularity = etree.SubElement(identify, self.PMH + "granularity")
673 # granularity.text = "YYYY-MM-DD"
674 granularity.text = DateFormat.granularity()
676 return identify
679class ListIdentifiers(OAI_PMH):
680 def __init__(self, base_url, from_date=None, until_date=None, oai_set=None, metadata_prefix=None):
681 super(ListIdentifiers, self).__init__(base_url)
682 self.verb = "ListIdentifiers"
683 self.from_date = from_date
684 self.until_date = until_date
685 self.oai_set = oai_set
686 self.metadata_prefix = metadata_prefix
687 self.records = []
688 self.resumption = None
690 def set_resumption(self, resumption_token, complete_list_size=None, cursor=None, expiry=-1):
691 self.resumption = {"resumption_token" : resumption_token, "expiry" : expiry}
692 if complete_list_size is not None:
693 self.resumption["complete_list_size"] = complete_list_size
694 if cursor is not None:
695 self.resumption["cursor"] = cursor
697 def add_record(self, header):
698 self.records.append(header)
700 def add_request_attributes(self, element):
701 if self.from_date is not None:
702 element.set("from", self.from_date)
703 if self.until_date is not None:
704 element.set("until", self.until_date)
705 if self.oai_set is not None:
706 element.set("set", self.oai_set)
707 if self.metadata_prefix is not None:
708 element.set("metadataPrefix", self.metadata_prefix)
710 def get_element(self):
711 lr = etree.Element(self.PMH + "ListIdentifiers", nsmap=self.NSMAP)
713 for header in self.records:
714 lr.append(header)
716 if self.resumption is not None:
717 rt = etree.SubElement(lr, self.PMH + "resumptionToken")
718 if "complete_list_size" in self.resumption:
719 rt.set("completeListSize", str(self.resumption.get("complete_list_size")))
720 if "cursor" in self.resumption:
721 rt.set("cursor", str(self.resumption.get("cursor")))
722 expiry = self.resumption.get("expiry", -1)
723 if expiry >= 0:
724 expire_date = DateFormat.format(dates.now() + timedelta(0, expiry))
725 rt.set("expirationDate", expire_date)
726 rt.text = self.resumption.get("resumption_token")
728 return lr
731class ListMetadataFormats(OAI_PMH):
732 def __init__(self, base_url, identifier=None):
733 super(ListMetadataFormats, self).__init__(base_url)
734 self.verb = "ListMetadataFormats"
735 self.identifier = identifier
736 self.formats = []
738 def add_format(self, metadata_prefix, schema, metadata_namespace):
739 self.formats.append(
740 {
741 "metadataPrefix": metadata_prefix,
742 "schema": schema,
743 "metadataNamespace": metadata_namespace
744 }
745 )
747 def add_request_attributes(self, element):
748 if self.identifier is not None:
749 element.set("identifier", self.identifier)
751 def get_element(self):
752 lmf = etree.Element(self.PMH + "ListMetadataFormats", nsmap=self.NSMAP)
754 for f in self.formats:
755 mdf = etree.SubElement(lmf, self.PMH + "metadataFormat")
757 mdp = etree.SubElement(mdf, self.PMH + "metadataPrefix")
758 mdp.text = f.get("metadataPrefix")
760 sch = etree.SubElement(mdf, self.PMH + "schema")
761 sch.text = f.get("schema")
763 mdn = etree.SubElement(mdf, self.PMH + "metadataNamespace")
764 mdn.text = f.get("metadataNamespace")
766 return lmf
769class ListRecords(OAI_PMH):
770 def __init__(self, base_url, from_date=None, until_date=None, oai_set=None, metadata_prefix=None):
771 super(ListRecords, self).__init__(base_url)
772 self.verb = "ListRecords"
773 self.from_date = from_date
774 self.until_date = until_date
775 self.oai_set = oai_set
776 self.metadata_prefix = metadata_prefix
777 self.records = []
778 self.resumption = None
779 self.resumption_expiry = -1
781 def set_resumption(self, resumption_token, complete_list_size=None, cursor=None, expiry=-1):
782 self.resumption = {"resumption_token" : resumption_token, "expiry" : expiry}
783 if complete_list_size is not None:
784 self.resumption["complete_list_size"] = complete_list_size
785 if cursor is not None:
786 self.resumption["cursor"] = cursor
788 def add_record(self, metadata, header):
789 self.records.append((metadata, header))
791 def add_request_attributes(self, element):
792 if self.from_date is not None:
793 element.set("from", self.from_date)
794 if self.until_date is not None:
795 element.set("until", self.until_date)
796 if self.oai_set is not None:
797 element.set("set", self.oai_set)
798 if self.metadata_prefix is not None:
799 element.set("metadataPrefix", self.metadata_prefix)
801 def get_element(self):
802 lr = etree.Element(self.PMH + "ListRecords", nsmap=self.NSMAP)
804 for metadata, header in self.records:
805 r = etree.SubElement(lr, self.PMH + "record")
806 r.append(header)
807 if metadata is not None:
808 r.append(metadata)
810 if self.resumption is not None:
811 rt = etree.SubElement(lr, self.PMH + "resumptionToken")
812 if "complete_list_size" in self.resumption:
813 rt.set("completeListSize", str(self.resumption.get("complete_list_size")))
814 if "cursor" in self.resumption:
815 rt.set("cursor", str(self.resumption.get("cursor")))
816 expiry = self.resumption.get("expiry", -1)
817 if expiry >= 0:
818 expire_date = DateFormat.format(dates.now() + timedelta(0, expiry))
819 rt.set("expirationDate", expire_date)
820 rt.text = self.resumption.get("resumption_token")
822 return lr
825class ListSets(OAI_PMH):
826 def __init__(self, base_url):
827 super(ListSets, self).__init__(base_url)
828 self.verb = "ListSets"
829 self.sets = []
831 def add_set(self, spec, name):
832 self.sets.append((spec, name))
834 def get_element(self):
835 ls = etree.Element(self.PMH + "ListSets", nsmap=self.NSMAP)
837 for spec, name in self.sets:
838 s = etree.SubElement(ls, self.PMH + "set")
839 specel = etree.SubElement(s, self.PMH + "setSpec")
840 specel.text = spec
841 nameel = etree.SubElement(s, self.PMH + "setName")
842 nameel.text = name
844 return ls
847#####################################################################
848# Error Handling
849#####################################################################
851class OAIPMHError(OAI_PMH):
852 def __init__(self, base_url):
853 super(OAIPMHError, self).__init__(base_url)
854 self.code = None
855 self.description = None
857 def get_element(self):
858 error = etree.Element(self.PMH + "error", nsmap=self.NSMAP)
860 if self.code is not None:
861 error.set("code", self.code)
863 if self.description is not None:
864 error.text = self.description
866 return error
869class BadArgument(OAIPMHError):
870 def __init__(self, base_url):
871 super(BadArgument, self).__init__(base_url)
872 self.code = "badArgument"
873 self.description = "The request includes illegal arguments, is missing required arguments, includes a repeated argument, or values for arguments have an illegal syntax."
876class BadResumptionToken(OAIPMHError):
877 def __init__(self, base_url):
878 super(BadResumptionToken, self).__init__(base_url)
879 self.code = "badResumptionToken"
880 self.description = "The value of the resumptionToken argument is invalid or expired."
883class BadVerb(OAIPMHError):
884 def __init__(self, base_url):
885 super(BadVerb, self).__init__(base_url)
886 self.code = "badVerb"
887 self.description = "Value of the verb argument is not a legal OAI-PMH verb, the verb argument is missing, or the verb argument is repeated."
890class CannotDisseminateFormat(OAIPMHError):
891 def __init__(self, base_url):
892 super(CannotDisseminateFormat, self).__init__(base_url)
893 self.code = "cannotDisseminateFormat"
894 self.description = "The metadata format identified by the value given for the metadataPrefix argument is not supported by the item or by the repository."
897class IdDoesNotExist(OAIPMHError):
898 def __init__(self, base_url):
899 super(IdDoesNotExist, self).__init__(base_url)
900 self.code = "idDoesNotExist"
901 self.description = "The value of the identifier argument is unknown or illegal in this repository."
904class NoRecordsMatch(OAIPMHError):
905 def __init__(self, base_url):
906 super(NoRecordsMatch, self).__init__(base_url)
907 self.code = "noRecordsMatch"
908 self.description = "The combination of the values of the from, until, set and metadataPrefix arguments results in an empty list."
911class NoMetadataFormats(OAIPMHError):
912 def __init__(self, base_url):
913 super(NoMetadataFormats, self).__init__(base_url)
914 self.code = "noMetadataFormats"
915 self.description = "There are no metadata formats available for the specified item."
918class NoSetHierarchy(OAIPMHError):
919 def __init__(self, base_url):
920 super(NoSetHierarchy, self).__init__(base_url)
921 self.code = "noSetHierarchy"
922 self.description = "The repository does not support sets."