Coverage for src / qdrant_loader / connectors / jira / mappers.py: 80%
137 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-06-11 09:38 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-06-11 09:38 +0000
1from __future__ import annotations
3from datetime import datetime
4from typing import Any
6from .config import JiraExtraField, JiraFieldType
7from .models import JiraAttachment, JiraComment, JiraIssue, JiraUser
10def parse_user(
11 raw_user: dict[str, Any] | None, required: bool = False
12) -> JiraUser | None:
13 """Parse a raw user from the Jira response into a JiraUser object."""
14 if not raw_user:
15 if required:
16 raise ValueError("User data is required but not provided")
17 return None
18 account_id = (
19 raw_user.get("accountId") or raw_user.get("name") or raw_user.get("key")
20 )
21 if not account_id:
22 if required:
23 raise ValueError(
24 "User data missing required identifier (accountId, name, or key)"
25 )
26 return None
27 return JiraUser(
28 account_id=account_id,
29 display_name=(
30 raw_user.get("displayName") or raw_user.get("name") or account_id
31 ),
32 email_address=raw_user.get("emailAddress"),
33 )
36def parse_attachment(raw_attachment: dict[str, Any]) -> JiraAttachment:
37 """Parse a raw attachment from the Jira response into a JiraAttachment object."""
38 required_keys = [
39 "id",
40 "filename",
41 "size",
42 "mimeType",
43 "content",
44 "created",
45 "author",
46 ]
48 missing_keys = [key for key in required_keys if key not in raw_attachment]
49 if missing_keys:
50 raise ValueError(
51 f"Attachment missing required keys: {', '.join(missing_keys)}. Received: {list(raw_attachment.keys())}"
52 )
54 author = parse_user(raw_attachment.get("author"), required=True)
55 if author is None:
56 raise ValueError("Missing author in Jira attachment")
58 created_raw = raw_attachment.get("created")
59 try:
60 created_dt = datetime.fromisoformat(created_raw.replace("Z", "+00:00"))
61 except Exception as e:
62 raise ValueError(
63 f"Invalid created timestamp in attachment: {created_raw!r}"
64 ) from e
66 return JiraAttachment(
67 id=raw_attachment.get("id"),
68 filename=raw_attachment.get("filename"),
69 size=raw_attachment.get("size"),
70 mime_type=raw_attachment.get("mimeType"),
71 content_url=raw_attachment.get("content"),
72 created=created_dt,
73 author=author,
74 )
77def parse_comment(raw_comment: dict[str, Any]) -> JiraComment:
78 """Parse a raw comment from the Jira response into a JiraComment object."""
79 author = parse_user(raw_comment["author"], required=True)
80 if author is None:
81 raise ValueError("Missing author in Jira comment")
83 body = raw_comment.get("body")
84 if body is None:
85 body = ""
86 elif not isinstance(body, str):
87 if not isinstance(body, dict):
88 raise ValueError(f"Unexpected body type in Jira comment: {type(body)}")
89 body = adf_to_oneline_fulltext(body)
91 return JiraComment(
92 id=raw_comment["id"],
93 body=body,
94 created=datetime.fromisoformat(raw_comment["created"].replace("Z", "+00:00")),
95 updated=(
96 datetime.fromisoformat(raw_comment["updated"].replace("Z", "+00:00"))
97 if "updated" in raw_comment
98 else None
99 ),
100 author=author,
101 )
104def _extract_extra_field_value(
105 container: dict[str, Any],
106 param_name: str,
107 field_type: JiraFieldType,
108 attr_name: str | None,
109) -> Any:
110 """Extract a single extra field value from the issue fields dict."""
111 raw = container.get(param_name)
113 if field_type in (JiraFieldType.SIMPLE, JiraFieldType.ARRAY):
114 return raw
116 if field_type == JiraFieldType.OBJECT:
117 if not isinstance(raw, dict):
118 return None
119 return raw.get(attr_name)
121 if field_type == JiraFieldType.ARRAY_OBJECT:
122 if not isinstance(raw, list):
123 return []
124 return [item.get(attr_name) for item in raw if isinstance(item, dict)]
126 return None
129def parse_issue(
130 raw_issue: dict[str, Any], extra_fields: list[JiraExtraField] | None = None
131) -> JiraIssue:
132 """Parse a raw issue from the Jira response into a JiraIssue object."""
133 # Gather identifiers early for clearer error messages
134 issue_id = raw_issue.get("id")
135 issue_key = raw_issue.get("key")
136 issue_identifier = issue_key or issue_id or "<unknown>"
138 # Validate presence of fields
139 fields = raw_issue.get("fields")
140 if not isinstance(fields, dict):
141 raise ValueError(
142 f"Jira issue {issue_identifier} missing required 'fields' object"
143 )
145 # Validate required top-level keys within fields
146 required_field_keys = ["summary", "created", "updated", "reporter"]
147 missing_simple = [
148 k for k in required_field_keys if k not in fields or fields.get(k) is None
149 ]
150 if missing_simple:
151 raise ValueError(
152 f"Jira issue {issue_identifier} missing required field(s): {', '.join(missing_simple)}"
153 )
155 # Validate nested required keys
156 def _require_dict_with_key(
157 container: dict[str, Any], outer_key: str, inner_key: str
158 ) -> None:
159 value = container.get(outer_key)
160 if (
161 not isinstance(value, dict)
162 or inner_key not in value
163 or value.get(inner_key) is None
164 ):
165 raise ValueError(
166 f"Jira issue {issue_identifier} missing required '{outer_key}.{inner_key}'"
167 )
169 _require_dict_with_key(fields, "issuetype", "name")
170 _require_dict_with_key(fields, "status", "name")
171 _require_dict_with_key(fields, "project", "key")
173 # Parse reporter (required)
174 reporter = parse_user(fields.get("reporter"), required=True)
175 if reporter is None:
176 raise ValueError(
177 f"Missing reporter for Jira issue {issue_identifier}: {fields.get('reporter')!r}"
178 )
180 # Parent key (optional)
181 parent = fields.get("parent")
182 parent_key = parent.get("key") if isinstance(parent, dict) else None
184 # Timestamps with clear error messages
185 created_raw = fields.get("created")
186 updated_raw = fields.get("updated")
187 try:
188 created_dt = (
189 datetime.fromisoformat(created_raw.replace("Z", "+00:00"))
190 if isinstance(created_raw, str)
191 else None
192 )
193 except Exception as e:
194 raise ValueError(
195 f"Invalid 'created' timestamp for Jira issue {issue_identifier}: {created_raw!r}"
196 ) from e
197 if created_dt is None:
198 raise ValueError(
199 f"Jira issue {issue_identifier} missing valid 'created' timestamp"
200 )
202 try:
203 updated_dt = (
204 datetime.fromisoformat(updated_raw.replace("Z", "+00:00"))
205 if isinstance(updated_raw, str)
206 else None
207 )
208 except Exception as e:
209 raise ValueError(
210 f"Invalid 'updated' timestamp for Jira issue {issue_identifier}: {updated_raw!r}"
211 ) from e
212 if updated_dt is None:
213 raise ValueError(
214 f"Jira issue {issue_identifier} missing valid 'updated' timestamp"
215 )
217 # Safely extract attachments: support both 'attachment' and 'attachments'
218 raw_attachments = fields.get("attachment")
219 if raw_attachments is None:
220 raw_attachments = fields.get("attachments")
221 attachments_list = raw_attachments if isinstance(raw_attachments, list) else []
223 # Safely extract comments from fields.comment.comments
224 comment_field = fields.get("comment")
225 if isinstance(comment_field, dict):
226 raw_comments = comment_field.get("comments", [])
227 else:
228 raw_comments = []
229 comments_list = raw_comments if isinstance(raw_comments, list) else []
231 # Safely extract subtasks keys
232 raw_subtasks = fields.get("subtasks", [])
233 subtasks_keys = [
234 st.get("key") for st in raw_subtasks if isinstance(st, dict) and st.get("key")
235 ]
237 # Safely extract linked issues (outward only as before)
238 raw_links = fields.get("issuelinks", [])
239 linked_outward = [
240 link.get("outwardIssue", {}).get("key")
241 for link in raw_links
242 if isinstance(link, dict)
243 and isinstance(link.get("outwardIssue"), dict)
244 and link.get("outwardIssue", {}).get("key")
245 ]
247 # Optional fields
248 priority_name = None
249 priority = fields.get("priority")
250 if isinstance(priority, dict):
251 priority_name = priority.get("name")
253 # Validate id/key presence for the model
254 if not issue_id or not issue_key:
255 raise ValueError(
256 f"Jira issue missing required top-level identifier(s): id={issue_id!r}, key={issue_key!r}"
257 )
258 description = fields.get("description", "")
259 if description is not None and not isinstance(description, str):
260 if not isinstance(description, dict):
261 raise ValueError(
262 f"Unexpected description type for Jira issue {issue_identifier}: {type(description)}"
263 )
264 description = adf_to_oneline_fulltext(description)
266 jira_issue = JiraIssue(
267 id=issue_id,
268 key=issue_key,
269 summary=str(fields.get("summary")),
270 description=description,
271 issue_type=fields.get("issuetype", {}).get("name"),
272 status=fields.get("status", {}).get("name"),
273 priority=priority_name,
274 project_key=fields.get("project", {}).get("key"),
275 created=created_dt,
276 updated=updated_dt,
277 reporter=reporter,
278 assignee=parse_user(fields.get("assignee")),
279 labels=(
280 fields.get("labels", []) if isinstance(fields.get("labels"), list) else []
281 ),
282 attachments=[
283 parse_attachment(att) for att in attachments_list if isinstance(att, dict)
284 ],
285 comments=[
286 parse_comment(comment)
287 for comment in comments_list
288 if isinstance(comment, dict)
289 ],
290 parent_key=parent_key,
291 subtasks=subtasks_keys,
292 linked_issues=[key for key in linked_outward if key],
293 )
294 if extra_fields:
295 for field in extra_fields:
296 value = _extract_extra_field_value(
297 fields,
298 field.param_name,
299 field.field_type,
300 field.attr_name,
301 )
302 setattr(jira_issue, field.name, value)
303 return jira_issue
306def adf_to_oneline_fulltext(node: dict) -> str:
307 """Convert Jira ADF structure to a single-line plain text string."""
309 def extract_text(obj: Any) -> list[str]:
310 texts: list[str] = []
312 if isinstance(obj, dict):
313 if "text" in obj and isinstance(obj["text"], str):
314 texts.append(obj["text"])
316 for key, value in obj.items():
317 if key != "text":
318 texts.extend(extract_text(value))
320 elif isinstance(obj, list):
321 for item in obj:
322 texts.extend(extract_text(item))
324 return texts
326 text_list = extract_text(node)
328 return " ".join(" ".join(text_list).split())