Coverage for src / qdrant_loader / connectors / jira / mappers.py: 77%
118 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-10 09:40 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-10 09:40 +0000
1from __future__ import annotations
3from datetime import datetime
4from typing import Any
6from .models import JiraAttachment, JiraComment, JiraIssue, JiraUser
9def parse_user(
10 raw_user: dict[str, Any] | None, required: bool = False
11) -> JiraUser | None:
12 """Parse a raw user from the Jira response into a JiraUser object."""
13 if not raw_user:
14 if required:
15 raise ValueError("User data is required but not provided")
16 return None
17 account_id = (
18 raw_user.get("accountId") or raw_user.get("name") or raw_user.get("key")
19 )
20 if not account_id:
21 if required:
22 raise ValueError(
23 "User data missing required identifier (accountId, name, or key)"
24 )
25 return None
26 return JiraUser(
27 account_id=account_id,
28 display_name=(
29 raw_user.get("displayName") or raw_user.get("name") or account_id
30 ),
31 email_address=raw_user.get("emailAddress"),
32 )
35def parse_attachment(raw_attachment: dict[str, Any]) -> JiraAttachment:
36 """Parse a raw attachment from the Jira response into a JiraAttachment object."""
37 required_keys = [
38 "id",
39 "filename",
40 "size",
41 "mimeType",
42 "content",
43 "created",
44 "author",
45 ]
47 missing_keys = [key for key in required_keys if key not in raw_attachment]
48 if missing_keys:
49 raise ValueError(
50 f"Attachment missing required keys: {', '.join(missing_keys)}. Received: {list(raw_attachment.keys())}"
51 )
53 author = parse_user(raw_attachment.get("author"), required=True)
54 if author is None:
55 raise ValueError("Missing author in Jira attachment")
57 created_raw = raw_attachment.get("created")
58 try:
59 created_dt = datetime.fromisoformat(created_raw.replace("Z", "+00:00"))
60 except Exception as e:
61 raise ValueError(
62 f"Invalid created timestamp in attachment: {created_raw!r}"
63 ) from e
65 return JiraAttachment(
66 id=raw_attachment.get("id"),
67 filename=raw_attachment.get("filename"),
68 size=raw_attachment.get("size"),
69 mime_type=raw_attachment.get("mimeType"),
70 content_url=raw_attachment.get("content"),
71 created=created_dt,
72 author=author,
73 )
76def parse_comment(raw_comment: dict[str, Any]) -> JiraComment:
77 """Parse a raw comment from the Jira response into a JiraComment object."""
78 author = parse_user(raw_comment["author"], required=True)
79 if author is None:
80 raise ValueError("Missing author in Jira comment")
82 body = raw_comment.get("body")
83 if body is None:
84 body = ""
85 elif not isinstance(body, str):
86 if not isinstance(body, dict):
87 raise ValueError(f"Unexpected body type in Jira comment: {type(body)}")
88 body = adf_to_oneline_fulltext(body)
90 return JiraComment(
91 id=raw_comment["id"],
92 body=body,
93 created=datetime.fromisoformat(raw_comment["created"].replace("Z", "+00:00")),
94 updated=(
95 datetime.fromisoformat(raw_comment["updated"].replace("Z", "+00:00"))
96 if "updated" in raw_comment
97 else None
98 ),
99 author=author,
100 )
103def parse_issue(raw_issue: dict[str, Any]) -> JiraIssue:
104 """Parse a raw issue from the Jira response into a JiraIssue object."""
105 # Gather identifiers early for clearer error messages
106 issue_id = raw_issue.get("id")
107 issue_key = raw_issue.get("key")
108 issue_identifier = issue_key or issue_id or "<unknown>"
110 # Validate presence of fields
111 fields = raw_issue.get("fields")
112 if not isinstance(fields, dict):
113 raise ValueError(
114 f"Jira issue {issue_identifier} missing required 'fields' object"
115 )
117 # Validate required top-level keys within fields
118 required_field_keys = ["summary", "created", "updated", "reporter"]
119 missing_simple = [
120 k for k in required_field_keys if k not in fields or fields.get(k) is None
121 ]
122 if missing_simple:
123 raise ValueError(
124 f"Jira issue {issue_identifier} missing required field(s): {', '.join(missing_simple)}"
125 )
127 # Validate nested required keys
128 def _require_dict_with_key(
129 container: dict[str, Any], outer_key: str, inner_key: str
130 ) -> None:
131 value = container.get(outer_key)
132 if (
133 not isinstance(value, dict)
134 or inner_key not in value
135 or value.get(inner_key) is None
136 ):
137 raise ValueError(
138 f"Jira issue {issue_identifier} missing required '{outer_key}.{inner_key}'"
139 )
141 _require_dict_with_key(fields, "issuetype", "name")
142 _require_dict_with_key(fields, "status", "name")
143 _require_dict_with_key(fields, "project", "key")
145 # Parse reporter (required)
146 reporter = parse_user(fields.get("reporter"), required=True)
147 if reporter is None:
148 raise ValueError(
149 f"Missing reporter for Jira issue {issue_identifier}: {fields.get('reporter')!r}"
150 )
152 # Parent key (optional)
153 parent = fields.get("parent")
154 parent_key = parent.get("key") if isinstance(parent, dict) else None
156 # Timestamps with clear error messages
157 created_raw = fields.get("created")
158 updated_raw = fields.get("updated")
159 try:
160 created_dt = (
161 datetime.fromisoformat(created_raw.replace("Z", "+00:00"))
162 if isinstance(created_raw, str)
163 else None
164 )
165 except Exception as e:
166 raise ValueError(
167 f"Invalid 'created' timestamp for Jira issue {issue_identifier}: {created_raw!r}"
168 ) from e
169 if created_dt is None:
170 raise ValueError(
171 f"Jira issue {issue_identifier} missing valid 'created' timestamp"
172 )
174 try:
175 updated_dt = (
176 datetime.fromisoformat(updated_raw.replace("Z", "+00:00"))
177 if isinstance(updated_raw, str)
178 else None
179 )
180 except Exception as e:
181 raise ValueError(
182 f"Invalid 'updated' timestamp for Jira issue {issue_identifier}: {updated_raw!r}"
183 ) from e
184 if updated_dt is None:
185 raise ValueError(
186 f"Jira issue {issue_identifier} missing valid 'updated' timestamp"
187 )
189 # Safely extract attachments: support both 'attachment' and 'attachments'
190 raw_attachments = fields.get("attachment")
191 if raw_attachments is None:
192 raw_attachments = fields.get("attachments")
193 attachments_list = raw_attachments if isinstance(raw_attachments, list) else []
195 # Safely extract comments from fields.comment.comments
196 comment_field = fields.get("comment")
197 if isinstance(comment_field, dict):
198 raw_comments = comment_field.get("comments", [])
199 else:
200 raw_comments = []
201 comments_list = raw_comments if isinstance(raw_comments, list) else []
203 # Safely extract subtasks keys
204 raw_subtasks = fields.get("subtasks", [])
205 subtasks_keys = [
206 st.get("key") for st in raw_subtasks if isinstance(st, dict) and st.get("key")
207 ]
209 # Safely extract linked issues (outward only as before)
210 raw_links = fields.get("issuelinks", [])
211 linked_outward = [
212 link.get("outwardIssue", {}).get("key")
213 for link in raw_links
214 if isinstance(link, dict)
215 and isinstance(link.get("outwardIssue"), dict)
216 and link.get("outwardIssue", {}).get("key")
217 ]
219 # Optional fields
220 priority_name = None
221 priority = fields.get("priority")
222 if isinstance(priority, dict):
223 priority_name = priority.get("name")
225 # Validate id/key presence for the model
226 if not issue_id or not issue_key:
227 raise ValueError(
228 f"Jira issue missing required top-level identifier(s): id={issue_id!r}, key={issue_key!r}"
229 )
231 description = fields.get("description", "")
232 if description is not None and not isinstance(description, str):
233 if not isinstance(description, dict):
234 raise ValueError(
235 f"Unexpected description type for Jira issue {issue_identifier}: {type(description)}"
236 )
237 description = adf_to_oneline_fulltext(description)
239 return JiraIssue(
240 id=issue_id,
241 key=issue_key,
242 summary=str(fields.get("summary")),
243 description=description,
244 issue_type=fields.get("issuetype", {}).get("name"),
245 status=fields.get("status", {}).get("name"),
246 priority=priority_name,
247 project_key=fields.get("project", {}).get("key"),
248 created=created_dt,
249 updated=updated_dt,
250 reporter=reporter,
251 assignee=parse_user(fields.get("assignee")),
252 labels=(
253 fields.get("labels", []) if isinstance(fields.get("labels"), list) else []
254 ),
255 attachments=[
256 parse_attachment(att) for att in attachments_list if isinstance(att, dict)
257 ],
258 comments=[
259 parse_comment(comment)
260 for comment in comments_list
261 if isinstance(comment, dict)
262 ],
263 parent_key=parent_key,
264 subtasks=subtasks_keys,
265 linked_issues=[key for key in linked_outward if key],
266 )
269def adf_to_oneline_fulltext(node: dict) -> str:
270 """Convert Jira ADF structure to a single-line plain text string."""
272 def extract_text(obj: Any) -> list[str]:
273 texts: list[str] = []
275 if isinstance(obj, dict):
276 if "text" in obj and isinstance(obj["text"], str):
277 texts.append(obj["text"])
279 for key, value in obj.items():
280 if key != "text":
281 texts.extend(extract_text(value))
283 elif isinstance(obj, list):
284 for item in obj:
285 texts.extend(extract_text(item))
287 return texts
289 text_list = extract_text(node)
291 return " ".join(" ".join(text_list).split())