Coverage for portality / lib / ris.py: 94%
80 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-04 09:41 +0100
« prev ^ index » next coverage.py v7.13.5, created at 2026-05-04 09:41 +0100
1"""
2very simple library for RIS format
4file format references: https://en.wikipedia.org/wiki/RIS_(file_format)
5"""
6import collections
7import logging
8from collections import OrderedDict
9from typing import Dict, Optional
11log = logging.getLogger(__name__)
13RTAG_TYPE = 'TY'
14RTAG_END = 'ER'
15RIS_TAGS = [
16 'A1', # primary_author
17 'A2', # secondary_author
18 'A3', # tertiary_author
19 'A4', # quaternary_author
20 'A5', # quinary_author_compiler
21 'A6', # website_editor
22 'AB', # abstract_synopsis
23 'AD', # author_editor_address
24 'AN', # accession_number
25 'AU', # author_editor_translator
26 'AV', # availability_location
27 'BT', # primary_secondary_title
28 'C1', # custom1
29 'C2', # custom2
30 'C3', # custom3
31 'C4', # custom4
32 'C5', # custom5
33 'C6', # custom6
34 'C7', # custom7
35 'C8', # custom8
36 'CA', # caption
37 'CL', # classification
38 'CN', # call_number
39 'CP', # city_place_publication
40 'CR', # cited_references
41 'CT', # caption_primary_title
42 'CY', # place_published
43 'DA', # date
44 'DB', # name_of_database
45 'DI', # digital_object_identifier
46 'DO', # digital_object_identifier2
47 'DOI', # digital_object_identifier3
48 'DP', # database_provider
49 'DS', # data_source
50 'ED', # secondary_author
51 'EP', # end_page
52 'ET', # edition
53 'FD', # free_form_publication_data
54 'H1', # location_library
55 'H2', # location_call_number
56 'ID', # reference_identifier
57 'IP', # identifying_phrase
58 'IS', # number_volumes
59 'J1', # journal_abbreviation_1
60 'J2', # alternate_title
61 'JA', # journal_standard_abbreviation
62 'JF', # journal_full_name
63 'JO', # journal_abbreviation
64 'K1', # keyword1
65 'KW', # keyword_phrase
66 'L1', # file_attachments
67 'L2', # url_link
68 'L3', # doi_link
69 'L4', # figure_image_link
70 'LA', # language
71 'LB', # label
72 'LK', # links
73 'LL', # sponsoring_library_location
74 'M1', # miscellaneous1
75 'M2', # miscellaneous2
76 'M3', # type_of_work
77 'N1', # notes1
78 'N2', # abstract_notes
79 'NO', # notes
80 'NV', # number_of_volumes
81 'OL', # output_language
82 'OP', # original_publication
83 'PA', # personal_notes
84 'PB', # publisher
85 'PMCID', # pmcid
86 'PMID', # pmid
87 'PP', # place_of_publication
88 'PY', # publication_year
89 'RD', # retrieved_date
90 'RI', # reviewed_item
91 'RN', # research_notes
92 'RP', # reprint_status
93 'RT', # reference_type
94 'SE', # section
95 'SF', # subfile_database
96 'SL', # sponsoring_library
97 'SN', # issn_isbn
98 'SP', # start_pages
99 'SR', # source_type
100 'ST', # short_title
101 'SV', # series_volume
102 'T1', # primary_title
103 'T2', # secondary_title
104 'T3', # tertiary_title
105 'TA', # translated_author
106 'TI', # title
107 'TT', # translated_title
108 RTAG_TYPE, # 'type_of_reference'
109 'U1', # user_definable1
110 'U2', # user_definable2
111 'U3', # user_definable3
112 'U4', # user_definable4
113 'U5', # user_definable5
114 'U6', # user_definable6
115 'U7', # user_definable7
116 'U8', # user_definable8
117 'U9', # user_definable9
118 'U10', # user_definable10
119 'U11', # user_definable11
120 'U12', # user_definable12
121 'U13', # user_definable13
122 'U14', # user_definable14
123 'U15', # user_definable15
124 'UR', # web_url
125 'VL', # volume
126 'VO', # volume_published_standard
127 'WP', # date_of_electronic_publication
128 'WT', # website_title
129 'WV', # website_version
130 'Y1', # year_date
131 'Y2', # access_date_secondary_date
132 'YR', # publication_year_ref
133]
136def find_tag(field_name) -> Optional[str]:
137 field_name = field_name.upper()
138 if field_name in RIS_TAGS:
139 return field_name
140 raise ValueError(f'Field not found: {field_name}')
143class RisEntry:
145 def __init__(self, type_of_reference: str = None):
146 self.data: collections.defaultdict[str, list] = collections.defaultdict(list)
147 if type_of_reference:
148 self.type = type_of_reference
150 def __setitem__(self, field_name, value):
151 tag = find_tag(field_name)
152 self.data[tag] = [value]
154 def append(self, tag, value) -> list:
155 tag = find_tag(tag)
156 self[tag].append(value)
157 return self[tag]
159 def __getitem__(self, field_name) -> list:
160 tag = find_tag(field_name)
161 return self.data[tag]
163 @property
164 def type(self):
165 return self[RTAG_TYPE] and self[RTAG_TYPE][0]
167 @type.setter
168 def type(self, value):
169 self[RTAG_TYPE] = value
171 @classmethod
172 def from_dict(cls, d: dict):
173 instance = cls()
174 for k, v in d.items():
175 if isinstance(v, list):
176 for vv in v:
177 instance[k].append(vv)
178 else:
179 instance[k].append(v)
181 return instance
183 @classmethod
184 def from_text(cls, text: str):
185 def _to_tag_value(line: str):
186 tag, value = line.split('-', 1)
187 tag = tag.strip()
188 value = value.lstrip()
189 value = value.replace('\\n', '\n')
190 return tag, value
192 text = text.strip()
193 lines = text.splitlines()
194 entry = RisEntry()
195 for line in lines:
196 tag, val = _to_tag_value(line)
197 if tag == RTAG_END:
198 break
199 entry[tag].append(val)
200 return entry
202 def to_text(self) -> str:
203 tags = list(self.data.keys())
204 if RTAG_TYPE in tags:
205 tags.remove(RTAG_TYPE)
206 tags.insert(0, RTAG_TYPE)
208 if RTAG_END in tags:
209 tags.remove(RTAG_END)
211 def _to_line(tag, value):
212 if '\n' in value:
213 value = value.replace('\n', '\\n')
214 if value is None:
215 value = ''
216 return f'{tag} - {value}\n'
218 text = ''
219 for tag in tags:
220 values = self.data[tag]
221 for v in values:
222 text += _to_line(tag, v)
224 text += _to_line(RTAG_END, '')
225 return text