Coverage for src/qdrant_loader/connectors/git/adapter.py: 93%

61 statements  

« prev     ^ index     » next       coverage.py v7.9.1, created at 2025-06-18 09:27 +0000

1"""Adapter for GitPython operations.""" 

2 

3import os 

4import time 

5from datetime import datetime 

6 

7import git 

8 

9from qdrant_loader.utils.logging import LoggingConfig 

10 

11logger = LoggingConfig.get_logger(__name__) 

12 

13 

14class GitPythonAdapter: 

15 """Adapter for GitPython operations.""" 

16 

17 def __init__(self, repo: git.Repo | None = None) -> None: 

18 """Initialize the adapter. 

19 

20 Args: 

21 repo: Git repository instance 

22 """ 

23 self.repo = repo 

24 self.logger = LoggingConfig.get_logger(__name__) 

25 

26 def clone(self, url: str, to_path: str, branch: str, depth: int) -> None: 

27 """Clone a Git repository. 

28 

29 Args: 

30 url (str): Repository URL 

31 to_path (str): Local path to clone to 

32 branch (str): Branch to clone 

33 depth (int): Clone depth (use 0 for full history) 

34 """ 

35 max_retries = 3 

36 retry_delay = 2 # seconds 

37 

38 for attempt in range(max_retries): 

39 try: 

40 clone_args = ["--branch", branch] 

41 if depth > 0: 

42 clone_args.extend(["--depth", str(depth)]) 

43 

44 # Store original value and disable credential prompts 

45 original_prompt = os.environ.get("GIT_TERMINAL_PROMPT") 

46 os.environ["GIT_TERMINAL_PROMPT"] = "0" 

47 try: 

48 self.repo = git.Repo.clone_from( 

49 url, to_path, multi_options=clone_args 

50 ) 

51 self.logger.info( 

52 f"Successfully cloned repository from {url} to {to_path}" 

53 ) 

54 finally: 

55 # Restore original value 

56 if original_prompt is not None: 

57 os.environ["GIT_TERMINAL_PROMPT"] = original_prompt 

58 else: 

59 del os.environ["GIT_TERMINAL_PROMPT"] 

60 return 

61 except Exception as e: 

62 if attempt < max_retries - 1: 

63 self.logger.warning( 

64 f"Clone attempt {attempt + 1} failed: {e}. Retrying in {retry_delay} seconds..." 

65 ) 

66 time.sleep(retry_delay) 

67 else: 

68 self.logger.error( 

69 f"Failed to clone repository after {max_retries} attempts: {e}" 

70 ) 

71 raise 

72 

73 def get_file_content(self, file_path: str) -> str: 

74 """Get file content. 

75 

76 Args: 

77 file_path (str): Path to the file 

78 

79 Returns: 

80 str: File content 

81 """ 

82 try: 

83 if not self.repo: 

84 raise ValueError("Repository not initialized") 

85 return self.repo.git.show(f"HEAD:{file_path}") 

86 except Exception as e: 

87 self.logger.error(f"Failed to read file {file_path}: {e}") 

88 raise 

89 

90 def get_last_commit_date(self, file_path: str) -> datetime | None: 

91 """Get the last commit date for a file. 

92 

93 Args: 

94 file_path (str): Path to the file 

95 

96 Returns: 

97 Optional[datetime]: Last commit date or None if not found 

98 """ 

99 try: 

100 repo = git.Repo(os.path.dirname(file_path), search_parent_directories=True) 

101 commits = list(repo.iter_commits(paths=file_path, max_count=1)) 

102 if commits: 

103 last_commit = commits[0] 

104 return last_commit.committed_datetime 

105 return None 

106 except Exception as e: 

107 self.logger.error(f"Failed to get last commit date for {file_path}: {e}") 

108 return None 

109 

110 def list_files(self, path: str = ".") -> list[str]: 

111 """List all files in the repository. 

112 

113 Args: 

114 path (str, optional): Path to list files from. Defaults to ".". 

115 

116 Returns: 

117 List[str]: List of file paths 

118 """ 

119 try: 

120 if not self.repo: 

121 raise ValueError("Repository not initialized") 

122 

123 # Use git ls-tree to list all files 

124 output = self.repo.git.ls_tree("-r", "--name-only", "HEAD", path) 

125 return output.splitlines() if output else [] 

126 except Exception as e: 

127 self.logger.error(f"Failed to list files: {e}") 

128 raise