Coverage for portality / regex.py: 89%

19 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-05-04 09:41 +0100

1import re 

2 

3#~~DOI:Regex~~ 

4DOI = r"^((https?://)?((dx\.)?doi\.org/|hdl\.handle\.net/)|doi:|info:doi/|info:hdl/)?(?P<id>10\.\S+/\S+)$" 

5DOI_COMPILED = re.compile(DOI, re.IGNORECASE) 

6 

7#~~ORCID:Regex~~ 

8ORCID = r"^https://orcid\.org/[0-9]{4}-[0-9]{4}-[0-9]{4}-\d{3}[\dX]$" 

9ORCID_COMPILED = re.compile(ORCID) 

10 

11#~~ISSN:Regex~~ 

12ISSN = r'^\d{4}-\d{3}(\d|X|x){1}$' 

13ISSN_COMPILED = re.compile(ISSN) 

14 

15#~~Date:Regex~~ 

16BIG_END_DATE = r'^\d{4}-\d{2}-\d{2}$' 

17BIG_END_DATE_COMPILED = re.compile(BIG_END_DATE) 

18 

19#~~URL:Regex~~ 

20HTTP_URL = ( 

21 r'^(?:https?)://' # Scheme: http(s) or ftp 

22 r'(?:[\w\-]+\.)*[\w\-]+' # Domain name (optional subdomains) 

23 r'(?:\.[a-z]{2,})' # Top-level domain (e.g., .com, .org) 

24 r'(?::(0|6[0-5][0-5][0-3][0-5]|[1-5][0-9][0-9][0-9][0-9]|[1-9][0-9]{0,3}))?' # port (0-65535) preceded with `:` 

25 r'(?:\/[^\/\s]*)*' # Path (optional) 

26 r'(?:\?[^\/\s]*)?' # Query string (optional) 

27 r'(?:#[^\/\s]*)?$' # Fragment (optional) 

28) 

29 

30HTTP_URL_COMPILED = re.compile(HTTP_URL, re.IGNORECASE) 

31 

32 

33def is_match(pattern, string, *args, **kwargs): 

34 match = re.match(pattern, string, *args, **kwargs) 

35 return match is not None 

36 

37 

38def group_match(pattern, string, name, *args, **kwargs): 

39 match = re.match(pattern, string, *args, **kwargs) 

40 if match is None: 

41 return None 

42 return match.group(name)