Coverage for portality/view/oaipmh.py: 90%
543 statements
« prev ^ index » next coverage.py v6.4.2, created at 2022-07-19 18:38 +0100
« prev ^ index » next coverage.py v6.4.2, created at 2022-07-19 18:38 +0100
1import json, base64
2from lxml import etree
3from datetime import datetime, timedelta
4from flask import Blueprint, request, make_response
5from portality.core import app
6from portality.models import OAIPMHJournal, OAIPMHArticle
7from portality.lib import plausible
8from portality.crosswalks.oaipmh import CROSSWALKS, make_set_spec, make_oai_identifier
10blueprint = Blueprint('oaipmh', __name__)
13#####################################################################
14# Web API endpoints
15#####################################################################
17@blueprint.route("/oai", methods=["GET", "POST"])
18@blueprint.route("/oai.<specified>", methods=["GET", "POST"])
19def oaipmh(specified=None):
20 # Google Analytics event, we don't know the action yet but it will be required.
21 event_payload = {}
22 # work out which endpoint we're going to
23 if specified is None:
24 dao = OAIPMHJournal()
25 event_payload['label'] = 'Journal'
26 else:
27 specified = specified.lower()
28 dao = OAIPMHArticle()
29 event_payload['label'] = 'Article'
31 # Add the identifier to the event if there is one
32 ident = request.values.get('identifier', None)
33 if ident is not None:
34 event_payload[app.config.get('GA_DIMENSIONS')['oai_res_id']] = ident
36 # work out the verb and associated parameters
37 verb = request.values.get("verb")
38 event_payload['action'] = verb
40 # Now we have enough information about the request to send to analytics.
41 plausible.send_event(app.config.get('GA_CATEGORY_OPENURL', 'OpenURL'),
42 **event_payload)
44 # call the appropriate protocol operation:
45 # if no verb supplied
46 if verb is None:
47 result = BadVerb(request.base_url)
49 # Identify
50 elif verb.lower() == "identify":
51 result = identify(dao, request.base_url)
53 # ListMetadataFormats
54 elif verb.lower() == "listmetadataformats":
55 params = list_metadata_formats_params(request)
56 result = list_metadata_formats(dao, request.base_url, specified, **params)
58 # GetRecord
59 elif verb.lower() == "getrecord":
60 params = get_record_params(request)
61 result = get_record(dao, request.base_url, specified, **params)
63 # ListSets
64 elif verb.lower() == "listsets":
65 params = list_sets_params(request)
66 result = list_sets(dao, request.base_url, **params)
68 # ListRecords
69 elif verb.lower() == "listrecords":
70 params = list_records_params(request)
71 result = list_records(dao, request.base_url, specified, **params)
73 # ListIdentifiers
74 elif verb.lower() == "listidentifiers":
75 params = list_identifiers_params(request)
76 result = list_identifiers(dao, request.base_url, specified, **params)
78 # A verb we didn't understand
79 else:
80 result = BadVerb(request.base_url)
82 # serialise and return
83 resp = make_response(result.serialise())
84 resp.mimetype = "text/xml"
85 return resp
88#####################################################################
89# Utility methods/objects
90#####################################################################
92class DateFormat(object):
93 @classmethod
94 def granularity(self):
95 return "YYYY-MM-DDThh:mm:ssZ"
97 @classmethod
98 def default_earliest(cls):
99 return "1970-01-01T00:00:00Z"
101 @classmethod
102 def now(cls):
103 return datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")
105 @classmethod
106 def format(cls, date):
107 return date.strftime("%Y-%m-%dT%H:%M:%SZ")
109 @classmethod
110 def legitimate_granularity(cls, datestr):
111 formats = ["%Y-%m-%d", "%Y-%m-%dT%H:%M:%SZ"]
112 success = False
113 for f in formats:
114 try:
115 datetime.strptime(datestr, f)
116 success = True
117 break
118 except Exception:
119 pass
120 return success
123def decode_set_spec(setspec):
124 # first, make sure the setspec is a string
125 """
126 try:
127 setspec = setspec.encode("utf-8")
128 except:
129 raise SetSpecException()
130 """
131 # switch the ~ for =
132 setspec = setspec.replace("~", "=")
134 try:
135 # base64 decode
136 decoded = base64.urlsafe_b64decode(setspec)
137 decoded = decoded.decode("utf-8")
138 except:
139 raise SetSpecException()
141 return decoded
144def get_start_after(docs, current_start_after, list_size):
145 last_date = docs[-1].get("last_updated")
146 count = 0
147 for doc in docs:
148 if doc.get("last_updated") == last_date:
149 count += 1
150 if count == list_size and current_start_after is not None and last_date == current_start_after[0]:
151 # If the current set of records have the same date as last record served previously,
152 # the count has to be greater than the list of records
153 # and include the previous count
154 count += current_start_after[1]
155 return (last_date, count)
158def make_resumption_token(metadata_prefix=None, from_date=None, until_date=None, oai_set=None, start_number=None, start_after=None):
159 d = {}
160 if metadata_prefix is not None:
161 d["m"] = metadata_prefix
162 if from_date is not None:
163 d["f"] = from_date
164 if until_date is not None:
165 d["u"] = until_date
166 if oai_set is not None:
167 d["s"] = oai_set
168 if start_number is not None:
169 d["n"] = start_number
170 if start_after is not None:
171 d["a"] = start_after
172 j = json.dumps(d)
173 b = base64.urlsafe_b64encode(j.encode('utf-8'))
174 return b
177class ResumptionTokenException(Exception):
178 pass
181class SetSpecException(Exception):
182 pass
185def decode_resumption_token(resumption_token):
186 # attempt to parse the resumption token out of base64 encoding and as a json object
187 try:
188 j = base64.urlsafe_b64decode(str(resumption_token))
189 except TypeError:
190 raise ResumptionTokenException()
191 try:
192 d = json.loads(j.decode("utf-8")) # convert the bytes to str for pre 3.5 compat
193 except ValueError:
194 raise ResumptionTokenException()
196 # if we succeed read out the parameters
197 params = {}
198 if "m" in d: params["metadata_prefix"] = d.get("m")
199 if "f" in d: params["from_date"] = d.get("f")
200 if "u" in d: params["until_date"] = d.get("u")
201 if "s" in d: params["oai_set"] = d.get("s")
202 if "n" in d: params["start_number"] = d.get("n")
203 if "a" in d: params["start_after"] = tuple(d.get("a"))
204 return params
207def extract_internal_id(oai_identifier):
208 # most of the identifier is for show - we only care about the hex string at the end
209 return oai_identifier.split(":")[-1]
212def get_response_date():
213 # return datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")
214 return DateFormat.now()
217def get_crosswalk(prefix, datatype):
218 return CROSSWALKS.get(prefix, {}).get(datatype)()
221def list_metadata_formats_params(req):
222 identifier = req.values.get("identifier")
223 if identifier is not None:
224 identifier = extract_internal_id(identifier)
225 return {"identifier" : identifier}
228def get_record_params(req):
229 identifier = req.values.get("identifier")
230 metadata_prefix = req.values.get("metadataPrefix")
231 if identifier is not None:
232 identifier = extract_internal_id(identifier)
233 return {"identifier": identifier, "metadata_prefix": metadata_prefix}
236def list_sets_params(req):
237 resumption = req.values.get("resumptionToken")
238 return {"resumption_token" : resumption}
241def list_records_params(req):
242 from_date = req.values.get("from")
243 until_date = req.values.get("until")
244 oai_set = req.values.get("set")
245 resumption_token = req.values.get("resumptionToken")
246 metadata_prefix = req.values.get("metadataPrefix")
247 return {
248 "from_date": from_date,
249 "until_date": until_date,
250 "oai_set": oai_set,
251 "resumption_token": resumption_token,
252 "metadata_prefix": metadata_prefix
253 }
256def list_identifiers_params(req):
257 from_date = req.values.get("from")
258 until_date = req.values.get("until")
259 oai_set = req.values.get("set")
260 resumption_token = req.values.get("resumptionToken")
261 metadata_prefix = req.values.get("metadataPrefix")
262 return {
263 "from_date": from_date,
264 "until_date": until_date,
265 "oai_set": oai_set,
266 "resumption_token": resumption_token,
267 "metadata_prefix": metadata_prefix
268 }
270#####################################################################
271# OAI-PMH protocol operations implemented
272#####################################################################
274def get_record(dao, base_url, specified_oai_endpoint, identifier=None, metadata_prefix=None):
275 # check that we have both identifier and prefix - they are both required
276 if identifier is None or metadata_prefix is None:
277 return BadArgument(base_url)
279 # get the formats and check that we have formats that we can disseminate
280 formats = app.config.get("OAIPMH_METADATA_FORMATS", {}).get(specified_oai_endpoint)
281 if formats is None or len(formats) == 0:
282 return CannotDisseminateFormat(base_url)
284 # look for our record of the format we've been asked for
285 for f in formats:
286 if f.get("metadataPrefix") == metadata_prefix:
287 # obtain the record from the dao
288 record = dao.pull(identifier)
289 if record is None:
290 return IdDoesNotExist(base_url)
291 # do the crosswalk
292 xwalk = get_crosswalk(f.get("metadataPrefix"), dao.__type__)
293 metadata = xwalk.crosswalk(record)
294 header = xwalk.header(record)
295 # make the response
296 oai_id = make_oai_identifier(identifier, dao.__type__)
297 gr = GetRecord(base_url, oai_id, metadata_prefix)
298 gr.metadata = metadata
299 gr.header = header
300 return gr
302 # if we have not returned already, this means we can't disseminate this format
303 return CannotDisseminateFormat(base_url)
306def identify(dao, base_url):
307 repo_name = app.config.get("SERVICE_NAME")
308 admin_email = app.config.get("ADMIN_EMAIL")
309 idobj = Identify(base_url, repo_name, admin_email)
310 idobj.earliest_datestamp = dao.earliest_datestamp()
311 return idobj
314def list_identifiers(dao, base_url, specified_oai_endpoint, metadata_prefix=None, from_date=None, until_date=None, oai_set=None, resumption_token=None):
315 if resumption_token is None:
316 # do an initial list records
317 return _parameterised_list_identifiers(
318 dao, base_url,
319 specified_oai_endpoint, metadata_prefix=metadata_prefix, from_date=from_date,
320 until_date=until_date, oai_set=oai_set
321 )
322 else:
323 # resumption of previous request
324 if (metadata_prefix is not None or from_date is not None or until_date is not None
325 or oai_set is not None):
326 return BadArgument(base_url)
327 return _resume_list_identifiers(dao, base_url, specified_oai_endpoint, resumption_token=resumption_token)
330def _parameterised_list(identifiers_or_records, dao, base_url, specified_oai_endpoint, metadata_prefix=None, from_date=None, until_date=None, oai_set=None, start_number=0, start_after=None):
331 # metadata prefix is required
332 if metadata_prefix is None:
333 return BadArgument(base_url)
335 # get the formats and check that we have formats that we can disseminate
336 formats = app.config.get("OAIPMH_METADATA_FORMATS", {}).get(specified_oai_endpoint)
337 if formats is None or len(formats) == 0:
338 return CannotDisseminateFormat(base_url)
340 # check that the dates are formatted correctly
341 fl = True
342 ul = True
343 if from_date is not None:
344 fl = DateFormat.legitimate_granularity(from_date)
345 if until_date is not None:
346 ul = DateFormat.legitimate_granularity(until_date)
348 if not fl or not ul:
349 return BadArgument(base_url)
351 # try:
352 # if from_date is not None:
353 # datetime.strptime(from_date, "%Y-%m-%d")
354 # if until_date is not None:
355 # datetime.strptime(until_date, "%Y-%m-%d")
356 # except:
357 # return BadArgument(base_url)
359 # get the result set size
360 list_size = app.config.get("OAIPMH_LIST_IDENTIFIERS_PAGE_SIZE", 25)
362 # decode the oai_set to something we can query with
363 try:
364 decoded_set = decode_set_spec(oai_set) if oai_set is not None else None
365 except SetSpecException:
366 return BadArgument(base_url)
368 for f in formats:
369 if f.get("metadataPrefix") == metadata_prefix:
370 # do the query and set up the response object
371 total, results = dao.list_records(from_date, until_date, decoded_set, list_size, start_after)
373 # if there are no results, PMH requires us to throw an error
374 if len(results) == 0:
375 return NoRecordsMatch(base_url)
377 # Get the full total
378 # Each search with a resumption token is a new search,
379 # so the total is not the same as the first search
380 # but is reduced by number of records already served.
381 # This full_total is the total as in the first search
382 full_total = total
383 if start_after is not None:
384 full_total = total + start_number - start_after[1]
386 # Determine where our next starting index will be
387 new_start = start_number + len(results)
389 # Work out if we need a resumption token. It can have one of 3 values:
390 # - None -> do not include the rt in the response if we have a full result set
391 # - the empty string -> include in the response if this is the last set of results from an incomplete list
392 # - some value -> include in the response if there are more values to retrieve
393 if len(results) == full_total:
394 resumption_token = None
395 elif new_start == full_total:
396 resumption_token = ''
397 else:
398 new_start_after = get_start_after(results, start_after, list_size)
399 resumption_token = make_resumption_token(metadata_prefix=metadata_prefix, from_date=from_date,
400 until_date=until_date, oai_set=oai_set, start_number=new_start,
401 start_after=new_start_after)
403 # Get our list of results for this request
404 if identifiers_or_records == 'identifiers':
405 lst = ListIdentifiers(base_url, from_date=from_date, until_date=until_date, oai_set=oai_set,
406 metadata_prefix=metadata_prefix)
407 else: # ListRecords
408 lst = ListRecords(base_url, from_date=from_date, until_date=until_date, oai_set=oai_set,
409 metadata_prefix=metadata_prefix)
411 if resumption_token is not None:
412 expiry = app.config.get("OAIPMH_RESUMPTION_TOKEN_EXPIRY", -1)
413 lst.set_resumption(resumption_token, complete_list_size=full_total, cursor=new_start, expiry=expiry)
415 for r in results:
416 # do the crosswalk
417 xwalk = get_crosswalk(f.get("metadataPrefix"), dao.__type__)
418 header = xwalk.header(r)
420 if identifiers_or_records == 'identifiers':
421 # add to the response (header only)
422 lst.add_record(header)
423 else: # ListRecords
424 metadata = xwalk.crosswalk(r)
425 # add to the response (metadata and
426 lst.add_record(metadata, header)
427 return lst
429 # if we have not returned already, this means we can't disseminate this format
430 return CannotDisseminateFormat(base_url)
433def _parameterised_list_identifiers(dao, base_url, specified_oai_endpoint, metadata_prefix=None, from_date=None, until_date=None, oai_set=None, start_number=0, start_after=None):
434 return _parameterised_list('identifiers', dao, base_url, specified_oai_endpoint, metadata_prefix, from_date, until_date, oai_set, start_number, start_after)
437def _resume_list_identifiers(dao, base_url, specified_oai_endpoint, resumption_token=None):
438 try:
439 params = decode_resumption_token(resumption_token)
440 except ResumptionTokenException:
441 return BadResumptionToken(base_url)
442 return _parameterised_list_identifiers(dao, base_url, specified_oai_endpoint, **params)
445def list_metadata_formats(dao, base_url, specified_oai_endpoint, identifier=None):
446 # if we are given an identifier, it has to be valid
447 if identifier is not None:
448 if not dao.identifier_exists(identifier):
449 return IdDoesNotExist(base_url)
451 # get the configured formats - there should always be some, but just in case
452 # the service is mis-configured, this will throw the correct error
453 formats = app.config.get("OAIPMH_METADATA_FORMATS", {}).get(specified_oai_endpoint)
454 if formats is None or len(formats) == 0:
455 return NoMetadataFormats(base_url)
457 # create and return the list metadata formats response
458 oai_id = None
459 if identifier is not None:
460 oai_id = make_oai_identifier(identifier, dao.__type__)
461 lmf = ListMetadataFormats(base_url=base_url, identifier=oai_id)
462 for f in formats:
463 lmf.add_format(f.get("metadataPrefix"), f.get("schema"), f.get("metadataNamespace"))
464 return lmf
467def list_records(dao, base_url, specified_oai_endpoint, metadata_prefix=None, from_date=None, until_date=None, oai_set=None, resumption_token=None):
469 if resumption_token is None:
470 # do an initial list records
471 return _parameterised_list_records(dao, base_url, specified_oai_endpoint, metadata_prefix=metadata_prefix, from_date=from_date, until_date=until_date, oai_set=oai_set)
472 else:
473 # resumption of previous request
474 if (metadata_prefix is not None or from_date is not None or until_date is not None
475 or oai_set is not None):
476 return BadArgument(base_url)
477 return _resume_list_records(dao, base_url, specified_oai_endpoint, resumption_token=resumption_token)
480def _parameterised_list_records(dao, base_url, specified_oai_endpoint, metadata_prefix=None, from_date=None, until_date=None, oai_set=None, start_number=0, start_after=None):
481 return _parameterised_list('records', dao, base_url, specified_oai_endpoint, metadata_prefix, from_date, until_date, oai_set, start_number, start_after)
484def _resume_list_records(dao, base_url, specified_oai_endpoint, resumption_token=None):
485 try:
486 params = decode_resumption_token(resumption_token)
487 except ResumptionTokenException:
488 return BadResumptionToken(base_url)
489 return _parameterised_list_records(dao, base_url, specified_oai_endpoint, **params)
492def list_sets(dao, base_url, resumption_token=None):
493 # This implementation does not support resumption tokens for this operation
494 if resumption_token is not None:
495 return BadResumptionToken(base_url)
497 # just ask the DAO to get a list of all the sets for us, then we
498 # give the set spec and set name as the same string
499 ls = ListSets(base_url)
500 sets = dao.list_sets()
501 for s in sets:
502 ls.add_set(make_set_spec(s), s)
503 return ls
506#####################################################################
507# Objects
508#####################################################################
510class OAI_PMH(object):
511 VERSION = "2.0"
513 PMH_NAMESPACE = "http://www.openarchives.org/OAI/2.0/"
514 PMH = "{%s}" % PMH_NAMESPACE
516 XSI_NAMESPACE = "http://www.w3.org/2001/XMLSchema-instance"
517 XSI = "{%s}" % XSI_NAMESPACE
519 NSMAP = {None : PMH_NAMESPACE, "xsi" : XSI_NAMESPACE}
521 def __init__(self, base_url):
522 self.base_url = base_url
523 self.verb = None
525 def _to_xml(self):
526 oai = etree.Element(self.PMH + "OAI-PMH", nsmap=self.NSMAP)
527 oai.set(self.XSI + "schemaLocation",
528 "http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd")
530 respdate = etree.SubElement(oai, self.PMH + "responseDate")
531 respdate.text = get_response_date()
533 req = etree.SubElement(oai, self.PMH + "request")
534 if self.verb is not None:
535 req.set("verb", self.verb)
536 req.text = self.base_url
537 self.add_request_attributes(req)
539 element = self.get_element()
540 oai.append(element)
542 return oai
544 def serialise(self):
545 xml = self._to_xml()
546 return etree.tostring(xml, xml_declaration=True, encoding="UTF-8")
548 def get_element(self):
549 raise NotImplementedError()
551 def add_request_attributes(self, element):
552 return
555class GetRecord(OAI_PMH):
556 def __init__(self, base_url, identifier, metadata_prefix):
557 super(GetRecord, self).__init__(base_url)
558 self.verb = "GetRecord"
559 self.identifier = identifier
560 self.metadata_prefix = metadata_prefix
561 self.metadata = None
562 self.header = None
564 def get_element(self):
565 gr = etree.Element(self.PMH + "GetRecord", nsmap=self.NSMAP)
566 record = etree.SubElement(gr, self.PMH + "record")
568 record.append(self.header)
569 record.append(self.metadata)
571 return gr
573 def add_request_attributes(self, element):
574 if self.identifier is not None:
575 element.set("identifier", self.identifier)
576 if self.metadata_prefix is not None:
577 element.set("metadataPrefix", self.metadata_prefix)
580class Identify(OAI_PMH):
581 def __init__(self, base_url, repo_name, admin_email):
582 super(Identify, self).__init__(base_url)
583 self.verb = "Identify"
584 self.repo_name = repo_name
585 self.admin_email = admin_email
586 self.earliest_datestamp = None
588 def get_element(self):
589 identify = etree.Element(self.PMH + "Identify", nsmap=self.NSMAP)
591 repo_name = etree.SubElement(identify, self.PMH + "repositoryName")
592 repo_name.text = self.repo_name
594 base = etree.SubElement(identify, self.PMH + "baseURL")
595 base.text = self.base_url
597 protocol = etree.SubElement(identify, self.PMH + "protocolVersion")
598 protocol.text = self.VERSION
600 admin_email = etree.SubElement(identify, self.PMH + "adminEmail")
601 admin_email.text = self.admin_email
603 earliest = etree.SubElement(identify, self.PMH + "earliestDatestamp")
604 if self.earliest_datestamp is not None:
605 earliest.text = self.earliest_datestamp
606 else:
607 # earliest.text = "1970-01-01T00:00:00Z" # beginning of the unix epoch
608 DateFormat.default_earliest()
610 deletes = etree.SubElement(identify, self.PMH + "deletedRecord")
611 deletes.text = "transient" # keep the door open
613 granularity = etree.SubElement(identify, self.PMH + "granularity")
614 # granularity.text = "YYYY-MM-DD"
615 granularity.text = DateFormat.granularity()
617 return identify
620class ListIdentifiers(OAI_PMH):
621 def __init__(self, base_url, from_date=None, until_date=None, oai_set=None, metadata_prefix=None):
622 super(ListIdentifiers, self).__init__(base_url)
623 self.verb = "ListIdentifiers"
624 self.from_date = from_date
625 self.until_date = until_date
626 self.oai_set = oai_set
627 self.metadata_prefix = metadata_prefix
628 self.records = []
629 self.resumption = None
631 def set_resumption(self, resumption_token, complete_list_size=None, cursor=None, expiry=-1):
632 self.resumption = {"resumption_token" : resumption_token, "expiry" : expiry}
633 if complete_list_size is not None:
634 self.resumption["complete_list_size"] = complete_list_size
635 if cursor is not None:
636 self.resumption["cursor"] = cursor
638 def add_record(self, header):
639 self.records.append(header)
641 def add_request_attributes(self, element):
642 if self.from_date is not None:
643 element.set("from", self.from_date)
644 if self.until_date is not None:
645 element.set("until", self.until_date)
646 if self.oai_set is not None:
647 element.set("set", self.oai_set)
648 if self.metadata_prefix is not None:
649 element.set("metadataPrefix", self.metadata_prefix)
651 def get_element(self):
652 lr = etree.Element(self.PMH + "ListIdentifiers", nsmap=self.NSMAP)
654 for header in self.records:
655 lr.append(header)
657 if self.resumption is not None:
658 rt = etree.SubElement(lr, self.PMH + "resumptionToken")
659 if "complete_list_size" in self.resumption:
660 rt.set("completeListSize", str(self.resumption.get("complete_list_size")))
661 if "cursor" in self.resumption:
662 rt.set("cursor", str(self.resumption.get("cursor")))
663 expiry = self.resumption.get("expiry", -1)
664 expire_date = None
665 if expiry >= 0:
666 # expire_date = (datetime.now() + timedelta(0, expiry)).strftime("%Y-%m-%dT%H:%M:%SZ")
667 expire_date = DateFormat.format(datetime.now() + timedelta(0, expiry))
668 rt.set("expirationDate", expire_date)
669 rt.text = self.resumption.get("resumption_token")
671 return lr
674class ListMetadataFormats(OAI_PMH):
675 def __init__(self, base_url, identifier=None):
676 super(ListMetadataFormats, self).__init__(base_url)
677 self.verb = "ListMetadataFormats"
678 self.identifier = identifier
679 self.formats = []
681 def add_format(self, metadata_prefix, schema, metadata_namespace):
682 self.formats.append(
683 {
684 "metadataPrefix": metadata_prefix,
685 "schema": schema,
686 "metadataNamespace": metadata_namespace
687 }
688 )
690 def add_request_attributes(self, element):
691 if self.identifier is not None:
692 element.set("identifier", self.identifier)
694 def get_element(self):
695 lmf = etree.Element(self.PMH + "ListMetadataFormats", nsmap=self.NSMAP)
697 for f in self.formats:
698 mdf = etree.SubElement(lmf, self.PMH + "metadataFormat")
700 mdp = etree.SubElement(mdf, self.PMH + "metadataPrefix")
701 mdp.text = f.get("metadataPrefix")
703 sch = etree.SubElement(mdf, self.PMH + "schema")
704 sch.text = f.get("schema")
706 mdn = etree.SubElement(mdf, self.PMH + "metadataNamespace")
707 mdn.text = f.get("metadataNamespace")
709 return lmf
712class ListRecords(OAI_PMH):
713 def __init__(self, base_url, from_date=None, until_date=None, oai_set=None, metadata_prefix=None):
714 super(ListRecords, self).__init__(base_url)
715 self.verb = "ListRecords"
716 self.from_date = from_date
717 self.until_date = until_date
718 self.oai_set = oai_set
719 self.metadata_prefix = metadata_prefix
720 self.records = []
721 self.resumption = None
722 self.resumption_expiry = -1
724 def set_resumption(self, resumption_token, complete_list_size=None, cursor=None, expiry=-1):
725 self.resumption = {"resumption_token" : resumption_token, "expiry" : expiry}
726 if complete_list_size is not None:
727 self.resumption["complete_list_size"] = complete_list_size
728 if cursor is not None:
729 self.resumption["cursor"] = cursor
731 def add_record(self, metadata, header):
732 self.records.append((metadata, header))
734 def add_request_attributes(self, element):
735 if self.from_date is not None:
736 element.set("from", self.from_date)
737 if self.until_date is not None:
738 element.set("until", self.until_date)
739 if self.oai_set is not None:
740 element.set("set", self.oai_set)
741 if self.metadata_prefix is not None:
742 element.set("metadataPrefix", self.metadata_prefix)
744 def get_element(self):
745 lr = etree.Element(self.PMH + "ListRecords", nsmap=self.NSMAP)
747 for metadata, header in self.records:
748 r = etree.SubElement(lr, self.PMH + "record")
749 r.append(header)
750 r.append(metadata)
752 if self.resumption is not None:
753 rt = etree.SubElement(lr, self.PMH + "resumptionToken")
754 if "complete_list_size" in self.resumption:
755 rt.set("completeListSize", str(self.resumption.get("complete_list_size")))
756 if "cursor" in self.resumption:
757 rt.set("cursor", str(self.resumption.get("cursor")))
758 expiry = self.resumption.get("expiry", -1)
759 expire_date = None
760 if expiry >= 0:
761 # expire_date = (datetime.now() + timedelta(0, expiry)).strftime("%Y-%m-%dT%H:%M:%SZ")
762 expire_date = DateFormat.format(datetime.now() + timedelta(0, expiry))
763 rt.set("expirationDate", expire_date)
764 rt.text = self.resumption.get("resumption_token")
766 return lr
769class ListSets(OAI_PMH):
770 def __init__(self, base_url):
771 super(ListSets, self).__init__(base_url)
772 self.verb = "ListSets"
773 self.sets = []
775 def add_set(self, spec, name):
776 self.sets.append((spec, name))
778 def get_element(self):
779 ls = etree.Element(self.PMH + "ListSets", nsmap=self.NSMAP)
781 for spec, name in self.sets:
782 s = etree.SubElement(ls, self.PMH + "set")
783 specel = etree.SubElement(s, self.PMH + "setSpec")
784 specel.text = spec
785 nameel = etree.SubElement(s, self.PMH + "setName")
786 nameel.text = name
788 return ls
791#####################################################################
792# Error Handling
793#####################################################################
795class OAIPMHError(OAI_PMH):
796 def __init__(self, base_url):
797 super(OAIPMHError, self).__init__(base_url)
798 self.code = None
799 self.description = None
801 def get_element(self):
802 error = etree.Element(self.PMH + "error", nsmap=self.NSMAP)
804 if self.code is not None:
805 error.set("code", self.code)
807 if self.description is not None:
808 error.text = self.description
810 return error
813class BadArgument(OAIPMHError):
814 def __init__(self, base_url):
815 super(BadArgument, self).__init__(base_url)
816 self.code = "badArgument"
817 self.description = "The request includes illegal arguments, is missing required arguments, includes a repeated argument, or values for arguments have an illegal syntax."
820class BadResumptionToken(OAIPMHError):
821 def __init__(self, base_url):
822 super(BadResumptionToken, self).__init__(base_url)
823 self.code = "badResumptionToken"
824 self.description = "The value of the resumptionToken argument is invalid or expired."
827class BadVerb(OAIPMHError):
828 def __init__(self, base_url):
829 super(BadVerb, self).__init__(base_url)
830 self.code = "badVerb"
831 self.description = "Value of the verb argument is not a legal OAI-PMH verb, the verb argument is missing, or the verb argument is repeated."
834class CannotDisseminateFormat(OAIPMHError):
835 def __init__(self, base_url):
836 super(CannotDisseminateFormat, self).__init__(base_url)
837 self.code = "cannotDisseminateFormat"
838 self.description = "The metadata format identified by the value given for the metadataPrefix argument is not supported by the item or by the repository."
841class IdDoesNotExist(OAIPMHError):
842 def __init__(self, base_url):
843 super(IdDoesNotExist, self).__init__(base_url)
844 self.code = "idDoesNotExist"
845 self.description = "The value of the identifier argument is unknown or illegal in this repository."
848class NoRecordsMatch(OAIPMHError):
849 def __init__(self, base_url):
850 super(NoRecordsMatch, self).__init__(base_url)
851 self.code = "noRecordsMatch"
852 self.description = "The combination of the values of the from, until, set and metadataPrefix arguments results in an empty list."
855class NoMetadataFormats(OAIPMHError):
856 def __init__(self, base_url):
857 super(NoMetadataFormats, self).__init__(base_url)
858 self.code = "noMetadataFormats"
859 self.description = "There are no metadata formats available for the specified item."
862class NoSetHierarchy(OAIPMHError):
863 def __init__(self, base_url):
864 super(NoSetHierarchy, self).__init__(base_url)
865 self.code = "noSetHierarchy"
866 self.description = "The repository does not support sets."