Coverage for src / qdrant_loader / connectors / jira / mappers.py: 80%

137 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-06-11 09:38 +0000

1from __future__ import annotations 

2 

3from datetime import datetime 

4from typing import Any 

5 

6from .config import JiraExtraField, JiraFieldType 

7from .models import JiraAttachment, JiraComment, JiraIssue, JiraUser 

8 

9 

10def parse_user( 

11 raw_user: dict[str, Any] | None, required: bool = False 

12) -> JiraUser | None: 

13 """Parse a raw user from the Jira response into a JiraUser object.""" 

14 if not raw_user: 

15 if required: 

16 raise ValueError("User data is required but not provided") 

17 return None 

18 account_id = ( 

19 raw_user.get("accountId") or raw_user.get("name") or raw_user.get("key") 

20 ) 

21 if not account_id: 

22 if required: 

23 raise ValueError( 

24 "User data missing required identifier (accountId, name, or key)" 

25 ) 

26 return None 

27 return JiraUser( 

28 account_id=account_id, 

29 display_name=( 

30 raw_user.get("displayName") or raw_user.get("name") or account_id 

31 ), 

32 email_address=raw_user.get("emailAddress"), 

33 ) 

34 

35 

36def parse_attachment(raw_attachment: dict[str, Any]) -> JiraAttachment: 

37 """Parse a raw attachment from the Jira response into a JiraAttachment object.""" 

38 required_keys = [ 

39 "id", 

40 "filename", 

41 "size", 

42 "mimeType", 

43 "content", 

44 "created", 

45 "author", 

46 ] 

47 

48 missing_keys = [key for key in required_keys if key not in raw_attachment] 

49 if missing_keys: 

50 raise ValueError( 

51 f"Attachment missing required keys: {', '.join(missing_keys)}. Received: {list(raw_attachment.keys())}" 

52 ) 

53 

54 author = parse_user(raw_attachment.get("author"), required=True) 

55 if author is None: 

56 raise ValueError("Missing author in Jira attachment") 

57 

58 created_raw = raw_attachment.get("created") 

59 try: 

60 created_dt = datetime.fromisoformat(created_raw.replace("Z", "+00:00")) 

61 except Exception as e: 

62 raise ValueError( 

63 f"Invalid created timestamp in attachment: {created_raw!r}" 

64 ) from e 

65 

66 return JiraAttachment( 

67 id=raw_attachment.get("id"), 

68 filename=raw_attachment.get("filename"), 

69 size=raw_attachment.get("size"), 

70 mime_type=raw_attachment.get("mimeType"), 

71 content_url=raw_attachment.get("content"), 

72 created=created_dt, 

73 author=author, 

74 ) 

75 

76 

77def parse_comment(raw_comment: dict[str, Any]) -> JiraComment: 

78 """Parse a raw comment from the Jira response into a JiraComment object.""" 

79 author = parse_user(raw_comment["author"], required=True) 

80 if author is None: 

81 raise ValueError("Missing author in Jira comment") 

82 

83 body = raw_comment.get("body") 

84 if body is None: 

85 body = "" 

86 elif not isinstance(body, str): 

87 if not isinstance(body, dict): 

88 raise ValueError(f"Unexpected body type in Jira comment: {type(body)}") 

89 body = adf_to_oneline_fulltext(body) 

90 

91 return JiraComment( 

92 id=raw_comment["id"], 

93 body=body, 

94 created=datetime.fromisoformat(raw_comment["created"].replace("Z", "+00:00")), 

95 updated=( 

96 datetime.fromisoformat(raw_comment["updated"].replace("Z", "+00:00")) 

97 if "updated" in raw_comment 

98 else None 

99 ), 

100 author=author, 

101 ) 

102 

103 

104def _extract_extra_field_value( 

105 container: dict[str, Any], 

106 param_name: str, 

107 field_type: JiraFieldType, 

108 attr_name: str | None, 

109) -> Any: 

110 """Extract a single extra field value from the issue fields dict.""" 

111 raw = container.get(param_name) 

112 

113 if field_type in (JiraFieldType.SIMPLE, JiraFieldType.ARRAY): 

114 return raw 

115 

116 if field_type == JiraFieldType.OBJECT: 

117 if not isinstance(raw, dict): 

118 return None 

119 return raw.get(attr_name) 

120 

121 if field_type == JiraFieldType.ARRAY_OBJECT: 

122 if not isinstance(raw, list): 

123 return [] 

124 return [item.get(attr_name) for item in raw if isinstance(item, dict)] 

125 

126 return None 

127 

128 

129def parse_issue( 

130 raw_issue: dict[str, Any], extra_fields: list[JiraExtraField] | None = None 

131) -> JiraIssue: 

132 """Parse a raw issue from the Jira response into a JiraIssue object.""" 

133 # Gather identifiers early for clearer error messages 

134 issue_id = raw_issue.get("id") 

135 issue_key = raw_issue.get("key") 

136 issue_identifier = issue_key or issue_id or "<unknown>" 

137 

138 # Validate presence of fields 

139 fields = raw_issue.get("fields") 

140 if not isinstance(fields, dict): 

141 raise ValueError( 

142 f"Jira issue {issue_identifier} missing required 'fields' object" 

143 ) 

144 

145 # Validate required top-level keys within fields 

146 required_field_keys = ["summary", "created", "updated", "reporter"] 

147 missing_simple = [ 

148 k for k in required_field_keys if k not in fields or fields.get(k) is None 

149 ] 

150 if missing_simple: 

151 raise ValueError( 

152 f"Jira issue {issue_identifier} missing required field(s): {', '.join(missing_simple)}" 

153 ) 

154 

155 # Validate nested required keys 

156 def _require_dict_with_key( 

157 container: dict[str, Any], outer_key: str, inner_key: str 

158 ) -> None: 

159 value = container.get(outer_key) 

160 if ( 

161 not isinstance(value, dict) 

162 or inner_key not in value 

163 or value.get(inner_key) is None 

164 ): 

165 raise ValueError( 

166 f"Jira issue {issue_identifier} missing required '{outer_key}.{inner_key}'" 

167 ) 

168 

169 _require_dict_with_key(fields, "issuetype", "name") 

170 _require_dict_with_key(fields, "status", "name") 

171 _require_dict_with_key(fields, "project", "key") 

172 

173 # Parse reporter (required) 

174 reporter = parse_user(fields.get("reporter"), required=True) 

175 if reporter is None: 

176 raise ValueError( 

177 f"Missing reporter for Jira issue {issue_identifier}: {fields.get('reporter')!r}" 

178 ) 

179 

180 # Parent key (optional) 

181 parent = fields.get("parent") 

182 parent_key = parent.get("key") if isinstance(parent, dict) else None 

183 

184 # Timestamps with clear error messages 

185 created_raw = fields.get("created") 

186 updated_raw = fields.get("updated") 

187 try: 

188 created_dt = ( 

189 datetime.fromisoformat(created_raw.replace("Z", "+00:00")) 

190 if isinstance(created_raw, str) 

191 else None 

192 ) 

193 except Exception as e: 

194 raise ValueError( 

195 f"Invalid 'created' timestamp for Jira issue {issue_identifier}: {created_raw!r}" 

196 ) from e 

197 if created_dt is None: 

198 raise ValueError( 

199 f"Jira issue {issue_identifier} missing valid 'created' timestamp" 

200 ) 

201 

202 try: 

203 updated_dt = ( 

204 datetime.fromisoformat(updated_raw.replace("Z", "+00:00")) 

205 if isinstance(updated_raw, str) 

206 else None 

207 ) 

208 except Exception as e: 

209 raise ValueError( 

210 f"Invalid 'updated' timestamp for Jira issue {issue_identifier}: {updated_raw!r}" 

211 ) from e 

212 if updated_dt is None: 

213 raise ValueError( 

214 f"Jira issue {issue_identifier} missing valid 'updated' timestamp" 

215 ) 

216 

217 # Safely extract attachments: support both 'attachment' and 'attachments' 

218 raw_attachments = fields.get("attachment") 

219 if raw_attachments is None: 

220 raw_attachments = fields.get("attachments") 

221 attachments_list = raw_attachments if isinstance(raw_attachments, list) else [] 

222 

223 # Safely extract comments from fields.comment.comments 

224 comment_field = fields.get("comment") 

225 if isinstance(comment_field, dict): 

226 raw_comments = comment_field.get("comments", []) 

227 else: 

228 raw_comments = [] 

229 comments_list = raw_comments if isinstance(raw_comments, list) else [] 

230 

231 # Safely extract subtasks keys 

232 raw_subtasks = fields.get("subtasks", []) 

233 subtasks_keys = [ 

234 st.get("key") for st in raw_subtasks if isinstance(st, dict) and st.get("key") 

235 ] 

236 

237 # Safely extract linked issues (outward only as before) 

238 raw_links = fields.get("issuelinks", []) 

239 linked_outward = [ 

240 link.get("outwardIssue", {}).get("key") 

241 for link in raw_links 

242 if isinstance(link, dict) 

243 and isinstance(link.get("outwardIssue"), dict) 

244 and link.get("outwardIssue", {}).get("key") 

245 ] 

246 

247 # Optional fields 

248 priority_name = None 

249 priority = fields.get("priority") 

250 if isinstance(priority, dict): 

251 priority_name = priority.get("name") 

252 

253 # Validate id/key presence for the model 

254 if not issue_id or not issue_key: 

255 raise ValueError( 

256 f"Jira issue missing required top-level identifier(s): id={issue_id!r}, key={issue_key!r}" 

257 ) 

258 description = fields.get("description", "") 

259 if description is not None and not isinstance(description, str): 

260 if not isinstance(description, dict): 

261 raise ValueError( 

262 f"Unexpected description type for Jira issue {issue_identifier}: {type(description)}" 

263 ) 

264 description = adf_to_oneline_fulltext(description) 

265 

266 jira_issue = JiraIssue( 

267 id=issue_id, 

268 key=issue_key, 

269 summary=str(fields.get("summary")), 

270 description=description, 

271 issue_type=fields.get("issuetype", {}).get("name"), 

272 status=fields.get("status", {}).get("name"), 

273 priority=priority_name, 

274 project_key=fields.get("project", {}).get("key"), 

275 created=created_dt, 

276 updated=updated_dt, 

277 reporter=reporter, 

278 assignee=parse_user(fields.get("assignee")), 

279 labels=( 

280 fields.get("labels", []) if isinstance(fields.get("labels"), list) else [] 

281 ), 

282 attachments=[ 

283 parse_attachment(att) for att in attachments_list if isinstance(att, dict) 

284 ], 

285 comments=[ 

286 parse_comment(comment) 

287 for comment in comments_list 

288 if isinstance(comment, dict) 

289 ], 

290 parent_key=parent_key, 

291 subtasks=subtasks_keys, 

292 linked_issues=[key for key in linked_outward if key], 

293 ) 

294 if extra_fields: 

295 for field in extra_fields: 

296 value = _extract_extra_field_value( 

297 fields, 

298 field.param_name, 

299 field.field_type, 

300 field.attr_name, 

301 ) 

302 setattr(jira_issue, field.name, value) 

303 return jira_issue 

304 

305 

306def adf_to_oneline_fulltext(node: dict) -> str: 

307 """Convert Jira ADF structure to a single-line plain text string.""" 

308 

309 def extract_text(obj: Any) -> list[str]: 

310 texts: list[str] = [] 

311 

312 if isinstance(obj, dict): 

313 if "text" in obj and isinstance(obj["text"], str): 

314 texts.append(obj["text"]) 

315 

316 for key, value in obj.items(): 

317 if key != "text": 

318 texts.extend(extract_text(value)) 

319 

320 elif isinstance(obj, list): 

321 for item in obj: 

322 texts.extend(extract_text(item)) 

323 

324 return texts 

325 

326 text_list = extract_text(node) 

327 

328 return " ".join(" ".join(text_list).split())