Coverage for portality / lib / ris.py: 94%

80 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-05-04 09:41 +0100

1""" 

2very simple library for RIS format 

3 

4file format references: https://en.wikipedia.org/wiki/RIS_(file_format) 

5""" 

6import collections 

7import logging 

8from collections import OrderedDict 

9from typing import Dict, Optional 

10 

11log = logging.getLogger(__name__) 

12 

13RTAG_TYPE = 'TY' 

14RTAG_END = 'ER' 

15RIS_TAGS = [ 

16 'A1', # primary_author 

17 'A2', # secondary_author 

18 'A3', # tertiary_author 

19 'A4', # quaternary_author 

20 'A5', # quinary_author_compiler 

21 'A6', # website_editor 

22 'AB', # abstract_synopsis 

23 'AD', # author_editor_address 

24 'AN', # accession_number 

25 'AU', # author_editor_translator 

26 'AV', # availability_location 

27 'BT', # primary_secondary_title 

28 'C1', # custom1 

29 'C2', # custom2 

30 'C3', # custom3 

31 'C4', # custom4 

32 'C5', # custom5 

33 'C6', # custom6 

34 'C7', # custom7 

35 'C8', # custom8 

36 'CA', # caption 

37 'CL', # classification 

38 'CN', # call_number 

39 'CP', # city_place_publication 

40 'CR', # cited_references 

41 'CT', # caption_primary_title 

42 'CY', # place_published 

43 'DA', # date 

44 'DB', # name_of_database 

45 'DI', # digital_object_identifier 

46 'DO', # digital_object_identifier2 

47 'DOI', # digital_object_identifier3 

48 'DP', # database_provider 

49 'DS', # data_source 

50 'ED', # secondary_author 

51 'EP', # end_page 

52 'ET', # edition 

53 'FD', # free_form_publication_data 

54 'H1', # location_library 

55 'H2', # location_call_number 

56 'ID', # reference_identifier 

57 'IP', # identifying_phrase 

58 'IS', # number_volumes 

59 'J1', # journal_abbreviation_1 

60 'J2', # alternate_title 

61 'JA', # journal_standard_abbreviation 

62 'JF', # journal_full_name 

63 'JO', # journal_abbreviation 

64 'K1', # keyword1 

65 'KW', # keyword_phrase 

66 'L1', # file_attachments 

67 'L2', # url_link 

68 'L3', # doi_link 

69 'L4', # figure_image_link 

70 'LA', # language 

71 'LB', # label 

72 'LK', # links 

73 'LL', # sponsoring_library_location 

74 'M1', # miscellaneous1 

75 'M2', # miscellaneous2 

76 'M3', # type_of_work 

77 'N1', # notes1 

78 'N2', # abstract_notes 

79 'NO', # notes 

80 'NV', # number_of_volumes 

81 'OL', # output_language 

82 'OP', # original_publication 

83 'PA', # personal_notes 

84 'PB', # publisher 

85 'PMCID', # pmcid 

86 'PMID', # pmid 

87 'PP', # place_of_publication 

88 'PY', # publication_year 

89 'RD', # retrieved_date 

90 'RI', # reviewed_item 

91 'RN', # research_notes 

92 'RP', # reprint_status 

93 'RT', # reference_type 

94 'SE', # section 

95 'SF', # subfile_database 

96 'SL', # sponsoring_library 

97 'SN', # issn_isbn 

98 'SP', # start_pages 

99 'SR', # source_type 

100 'ST', # short_title 

101 'SV', # series_volume 

102 'T1', # primary_title 

103 'T2', # secondary_title 

104 'T3', # tertiary_title 

105 'TA', # translated_author 

106 'TI', # title 

107 'TT', # translated_title 

108 RTAG_TYPE, # 'type_of_reference' 

109 'U1', # user_definable1 

110 'U2', # user_definable2 

111 'U3', # user_definable3 

112 'U4', # user_definable4 

113 'U5', # user_definable5 

114 'U6', # user_definable6 

115 'U7', # user_definable7 

116 'U8', # user_definable8 

117 'U9', # user_definable9 

118 'U10', # user_definable10 

119 'U11', # user_definable11 

120 'U12', # user_definable12 

121 'U13', # user_definable13 

122 'U14', # user_definable14 

123 'U15', # user_definable15 

124 'UR', # web_url 

125 'VL', # volume 

126 'VO', # volume_published_standard 

127 'WP', # date_of_electronic_publication 

128 'WT', # website_title 

129 'WV', # website_version 

130 'Y1', # year_date 

131 'Y2', # access_date_secondary_date 

132 'YR', # publication_year_ref 

133] 

134 

135 

136def find_tag(field_name) -> Optional[str]: 

137 field_name = field_name.upper() 

138 if field_name in RIS_TAGS: 

139 return field_name 

140 raise ValueError(f'Field not found: {field_name}') 

141 

142 

143class RisEntry: 

144 

145 def __init__(self, type_of_reference: str = None): 

146 self.data: collections.defaultdict[str, list] = collections.defaultdict(list) 

147 if type_of_reference: 

148 self.type = type_of_reference 

149 

150 def __setitem__(self, field_name, value): 

151 tag = find_tag(field_name) 

152 self.data[tag] = [value] 

153 

154 def append(self, tag, value) -> list: 

155 tag = find_tag(tag) 

156 self[tag].append(value) 

157 return self[tag] 

158 

159 def __getitem__(self, field_name) -> list: 

160 tag = find_tag(field_name) 

161 return self.data[tag] 

162 

163 @property 

164 def type(self): 

165 return self[RTAG_TYPE] and self[RTAG_TYPE][0] 

166 

167 @type.setter 

168 def type(self, value): 

169 self[RTAG_TYPE] = value 

170 

171 @classmethod 

172 def from_dict(cls, d: dict): 

173 instance = cls() 

174 for k, v in d.items(): 

175 if isinstance(v, list): 

176 for vv in v: 

177 instance[k].append(vv) 

178 else: 

179 instance[k].append(v) 

180 

181 return instance 

182 

183 @classmethod 

184 def from_text(cls, text: str): 

185 def _to_tag_value(line: str): 

186 tag, value = line.split('-', 1) 

187 tag = tag.strip() 

188 value = value.lstrip() 

189 value = value.replace('\\n', '\n') 

190 return tag, value 

191 

192 text = text.strip() 

193 lines = text.splitlines() 

194 entry = RisEntry() 

195 for line in lines: 

196 tag, val = _to_tag_value(line) 

197 if tag == RTAG_END: 

198 break 

199 entry[tag].append(val) 

200 return entry 

201 

202 def to_text(self) -> str: 

203 tags = list(self.data.keys()) 

204 if RTAG_TYPE in tags: 

205 tags.remove(RTAG_TYPE) 

206 tags.insert(0, RTAG_TYPE) 

207 

208 if RTAG_END in tags: 

209 tags.remove(RTAG_END) 

210 

211 def _to_line(tag, value): 

212 if '\n' in value: 

213 value = value.replace('\n', '\\n') 

214 if value is None: 

215 value = '' 

216 return f'{tag} - {value}\n' 

217 

218 text = '' 

219 for tag in tags: 

220 values = self.data[tag] 

221 for v in values: 

222 text += _to_line(tag, v) 

223 

224 text += _to_line(RTAG_END, '') 

225 return text