Coverage for src / qdrant_loader / connectors / jira / mappers.py: 77%

118 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-10 09:40 +0000

1from __future__ import annotations 

2 

3from datetime import datetime 

4from typing import Any 

5 

6from .models import JiraAttachment, JiraComment, JiraIssue, JiraUser 

7 

8 

9def parse_user( 

10 raw_user: dict[str, Any] | None, required: bool = False 

11) -> JiraUser | None: 

12 """Parse a raw user from the Jira response into a JiraUser object.""" 

13 if not raw_user: 

14 if required: 

15 raise ValueError("User data is required but not provided") 

16 return None 

17 account_id = ( 

18 raw_user.get("accountId") or raw_user.get("name") or raw_user.get("key") 

19 ) 

20 if not account_id: 

21 if required: 

22 raise ValueError( 

23 "User data missing required identifier (accountId, name, or key)" 

24 ) 

25 return None 

26 return JiraUser( 

27 account_id=account_id, 

28 display_name=( 

29 raw_user.get("displayName") or raw_user.get("name") or account_id 

30 ), 

31 email_address=raw_user.get("emailAddress"), 

32 ) 

33 

34 

35def parse_attachment(raw_attachment: dict[str, Any]) -> JiraAttachment: 

36 """Parse a raw attachment from the Jira response into a JiraAttachment object.""" 

37 required_keys = [ 

38 "id", 

39 "filename", 

40 "size", 

41 "mimeType", 

42 "content", 

43 "created", 

44 "author", 

45 ] 

46 

47 missing_keys = [key for key in required_keys if key not in raw_attachment] 

48 if missing_keys: 

49 raise ValueError( 

50 f"Attachment missing required keys: {', '.join(missing_keys)}. Received: {list(raw_attachment.keys())}" 

51 ) 

52 

53 author = parse_user(raw_attachment.get("author"), required=True) 

54 if author is None: 

55 raise ValueError("Missing author in Jira attachment") 

56 

57 created_raw = raw_attachment.get("created") 

58 try: 

59 created_dt = datetime.fromisoformat(created_raw.replace("Z", "+00:00")) 

60 except Exception as e: 

61 raise ValueError( 

62 f"Invalid created timestamp in attachment: {created_raw!r}" 

63 ) from e 

64 

65 return JiraAttachment( 

66 id=raw_attachment.get("id"), 

67 filename=raw_attachment.get("filename"), 

68 size=raw_attachment.get("size"), 

69 mime_type=raw_attachment.get("mimeType"), 

70 content_url=raw_attachment.get("content"), 

71 created=created_dt, 

72 author=author, 

73 ) 

74 

75 

76def parse_comment(raw_comment: dict[str, Any]) -> JiraComment: 

77 """Parse a raw comment from the Jira response into a JiraComment object.""" 

78 author = parse_user(raw_comment["author"], required=True) 

79 if author is None: 

80 raise ValueError("Missing author in Jira comment") 

81 

82 body = raw_comment.get("body") 

83 if body is None: 

84 body = "" 

85 elif not isinstance(body, str): 

86 if not isinstance(body, dict): 

87 raise ValueError(f"Unexpected body type in Jira comment: {type(body)}") 

88 body = adf_to_oneline_fulltext(body) 

89 

90 return JiraComment( 

91 id=raw_comment["id"], 

92 body=body, 

93 created=datetime.fromisoformat(raw_comment["created"].replace("Z", "+00:00")), 

94 updated=( 

95 datetime.fromisoformat(raw_comment["updated"].replace("Z", "+00:00")) 

96 if "updated" in raw_comment 

97 else None 

98 ), 

99 author=author, 

100 ) 

101 

102 

103def parse_issue(raw_issue: dict[str, Any]) -> JiraIssue: 

104 """Parse a raw issue from the Jira response into a JiraIssue object.""" 

105 # Gather identifiers early for clearer error messages 

106 issue_id = raw_issue.get("id") 

107 issue_key = raw_issue.get("key") 

108 issue_identifier = issue_key or issue_id or "<unknown>" 

109 

110 # Validate presence of fields 

111 fields = raw_issue.get("fields") 

112 if not isinstance(fields, dict): 

113 raise ValueError( 

114 f"Jira issue {issue_identifier} missing required 'fields' object" 

115 ) 

116 

117 # Validate required top-level keys within fields 

118 required_field_keys = ["summary", "created", "updated", "reporter"] 

119 missing_simple = [ 

120 k for k in required_field_keys if k not in fields or fields.get(k) is None 

121 ] 

122 if missing_simple: 

123 raise ValueError( 

124 f"Jira issue {issue_identifier} missing required field(s): {', '.join(missing_simple)}" 

125 ) 

126 

127 # Validate nested required keys 

128 def _require_dict_with_key( 

129 container: dict[str, Any], outer_key: str, inner_key: str 

130 ) -> None: 

131 value = container.get(outer_key) 

132 if ( 

133 not isinstance(value, dict) 

134 or inner_key not in value 

135 or value.get(inner_key) is None 

136 ): 

137 raise ValueError( 

138 f"Jira issue {issue_identifier} missing required '{outer_key}.{inner_key}'" 

139 ) 

140 

141 _require_dict_with_key(fields, "issuetype", "name") 

142 _require_dict_with_key(fields, "status", "name") 

143 _require_dict_with_key(fields, "project", "key") 

144 

145 # Parse reporter (required) 

146 reporter = parse_user(fields.get("reporter"), required=True) 

147 if reporter is None: 

148 raise ValueError( 

149 f"Missing reporter for Jira issue {issue_identifier}: {fields.get('reporter')!r}" 

150 ) 

151 

152 # Parent key (optional) 

153 parent = fields.get("parent") 

154 parent_key = parent.get("key") if isinstance(parent, dict) else None 

155 

156 # Timestamps with clear error messages 

157 created_raw = fields.get("created") 

158 updated_raw = fields.get("updated") 

159 try: 

160 created_dt = ( 

161 datetime.fromisoformat(created_raw.replace("Z", "+00:00")) 

162 if isinstance(created_raw, str) 

163 else None 

164 ) 

165 except Exception as e: 

166 raise ValueError( 

167 f"Invalid 'created' timestamp for Jira issue {issue_identifier}: {created_raw!r}" 

168 ) from e 

169 if created_dt is None: 

170 raise ValueError( 

171 f"Jira issue {issue_identifier} missing valid 'created' timestamp" 

172 ) 

173 

174 try: 

175 updated_dt = ( 

176 datetime.fromisoformat(updated_raw.replace("Z", "+00:00")) 

177 if isinstance(updated_raw, str) 

178 else None 

179 ) 

180 except Exception as e: 

181 raise ValueError( 

182 f"Invalid 'updated' timestamp for Jira issue {issue_identifier}: {updated_raw!r}" 

183 ) from e 

184 if updated_dt is None: 

185 raise ValueError( 

186 f"Jira issue {issue_identifier} missing valid 'updated' timestamp" 

187 ) 

188 

189 # Safely extract attachments: support both 'attachment' and 'attachments' 

190 raw_attachments = fields.get("attachment") 

191 if raw_attachments is None: 

192 raw_attachments = fields.get("attachments") 

193 attachments_list = raw_attachments if isinstance(raw_attachments, list) else [] 

194 

195 # Safely extract comments from fields.comment.comments 

196 comment_field = fields.get("comment") 

197 if isinstance(comment_field, dict): 

198 raw_comments = comment_field.get("comments", []) 

199 else: 

200 raw_comments = [] 

201 comments_list = raw_comments if isinstance(raw_comments, list) else [] 

202 

203 # Safely extract subtasks keys 

204 raw_subtasks = fields.get("subtasks", []) 

205 subtasks_keys = [ 

206 st.get("key") for st in raw_subtasks if isinstance(st, dict) and st.get("key") 

207 ] 

208 

209 # Safely extract linked issues (outward only as before) 

210 raw_links = fields.get("issuelinks", []) 

211 linked_outward = [ 

212 link.get("outwardIssue", {}).get("key") 

213 for link in raw_links 

214 if isinstance(link, dict) 

215 and isinstance(link.get("outwardIssue"), dict) 

216 and link.get("outwardIssue", {}).get("key") 

217 ] 

218 

219 # Optional fields 

220 priority_name = None 

221 priority = fields.get("priority") 

222 if isinstance(priority, dict): 

223 priority_name = priority.get("name") 

224 

225 # Validate id/key presence for the model 

226 if not issue_id or not issue_key: 

227 raise ValueError( 

228 f"Jira issue missing required top-level identifier(s): id={issue_id!r}, key={issue_key!r}" 

229 ) 

230 

231 description = fields.get("description", "") 

232 if description is not None and not isinstance(description, str): 

233 if not isinstance(description, dict): 

234 raise ValueError( 

235 f"Unexpected description type for Jira issue {issue_identifier}: {type(description)}" 

236 ) 

237 description = adf_to_oneline_fulltext(description) 

238 

239 return JiraIssue( 

240 id=issue_id, 

241 key=issue_key, 

242 summary=str(fields.get("summary")), 

243 description=description, 

244 issue_type=fields.get("issuetype", {}).get("name"), 

245 status=fields.get("status", {}).get("name"), 

246 priority=priority_name, 

247 project_key=fields.get("project", {}).get("key"), 

248 created=created_dt, 

249 updated=updated_dt, 

250 reporter=reporter, 

251 assignee=parse_user(fields.get("assignee")), 

252 labels=( 

253 fields.get("labels", []) if isinstance(fields.get("labels"), list) else [] 

254 ), 

255 attachments=[ 

256 parse_attachment(att) for att in attachments_list if isinstance(att, dict) 

257 ], 

258 comments=[ 

259 parse_comment(comment) 

260 for comment in comments_list 

261 if isinstance(comment, dict) 

262 ], 

263 parent_key=parent_key, 

264 subtasks=subtasks_keys, 

265 linked_issues=[key for key in linked_outward if key], 

266 ) 

267 

268 

269def adf_to_oneline_fulltext(node: dict) -> str: 

270 """Convert Jira ADF structure to a single-line plain text string.""" 

271 

272 def extract_text(obj: Any) -> list[str]: 

273 texts: list[str] = [] 

274 

275 if isinstance(obj, dict): 

276 if "text" in obj and isinstance(obj["text"], str): 

277 texts.append(obj["text"]) 

278 

279 for key, value in obj.items(): 

280 if key != "text": 

281 texts.extend(extract_text(value)) 

282 

283 elif isinstance(obj, list): 

284 for item in obj: 

285 texts.extend(extract_text(item)) 

286 

287 return texts 

288 

289 text_list = extract_text(node) 

290 

291 return " ".join(" ".join(text_list).split())