diff --git a/lib/git-base-rc-tag.sh b/lib/git-base-rc-tag.sh index 75cf14d039e2b18abfa869f6c38961aaacab8af2..a5e1c00e65ca81f07aa0de1b8ab3c87dbaaa44e5 100644 --- a/lib/git-base-rc-tag.sh +++ b/lib/git-base-rc-tag.sh @@ -103,15 +103,15 @@ main() { return 1 fi - # 检查是否为Git仓库 - if ! git -C "$work_dir" rev-parse --is-inside-work-tree >/dev/null 2>&1; then + # 检查是否为Git仓库(支持普通仓库和bare仓库) + if ! git -C "$work_dir" rev-parse --git-dir >/dev/null 2>&1; then echo "错误: '$work_dir' 不是Git仓库" >&2 return 1 fi if [ "$VERBOSE" = "true" ]; then echo "[DEBUG] Git仓库验证成功" >&2 - echo "[DEBUG] 仓库根目录: $(git -C "$work_dir" rev-parse --show-toplevel)" >&2 + echo "[DEBUG] 仓库路径: $(git -C "$work_dir" rev-parse --git-dir)" >&2 fi # 执行查找 diff --git a/programs/bisect-py/jobs/auto_test/kernel-selftests.yaml b/programs/bisect-py/jobs/auto_test/kernel-selftests.yaml index f391e127e1a439221547400e90f1ec58d417953b..2f57a3e540a7416e1181dc41aafd0d41df0790ef 100644 --- a/programs/bisect-py/jobs/auto_test/kernel-selftests.yaml +++ b/programs/bisect-py/jobs/auto_test/kernel-selftests.yaml @@ -6,7 +6,6 @@ need_memory: 2G need_cpu: 2 testbox: - vm-2p8g - - taishan200-2280-2s64p-128g--a1003 - taishan200-2280-2s48p-512g--a1320 ss: kernel-selftests: @@ -39,7 +38,6 @@ kernel_cmdline: erst_disable need_memory: 3G testbox: - vm-2p8g - - taishan200-2280-2s64p-128g--a1003 - taishan200-2280-2s48p-512g--a1320 program.kernel-selftests: group: @@ -53,7 +51,6 @@ kernel_cmdline: sysctl.debug.test_sysctl.boot_int=1 --- testbox: - vm-2p8g - - taishan200-2280-2s64p-128g--a1003 - taishan200-2280-2s48p-512g--a1320 program.kernel-selftests: @@ -67,7 +64,6 @@ kernel_cmdline: kvm-intel.unrestricted_guest=0 --- testbox: - vm-2p8g - - taishan200-2280-2s64p-128g--a1003 - taishan200-2280-2s48p-512g--a1320 program.kernel-selftests: diff --git a/programs/bisect-py/jobs/auto_test/linux.yaml b/programs/bisect-py/jobs/auto_test/linux.yaml deleted file mode 100644 index 358fb18317a2c8d97dbf4f762e8f016c67939afc..0000000000000000000000000000000000000000 --- a/programs/bisect-py/jobs/auto_test/linux.yaml +++ /dev/null @@ -1,8 +0,0 @@ -suite: makepkg -testbox: dc-16g -program: - makepkg: - commit: b7d4e259682caccb51a25283655f2c8f02e32d23 - config: randconfig-manual-test3 - _url: git+https://mirrors.tuna.tsinghua.edu.cn/git/linux-next.git - project: linux diff --git a/programs/bisect-py/jobs/auto_test/ltp-syscall_linux.yaml b/programs/bisect-py/jobs/auto_test/ltp-syscall_linux.yaml deleted file mode 100644 index fb83d1babc191aab9310200fe5c4e3b64c8b4e14..0000000000000000000000000000000000000000 --- a/programs/bisect-py/jobs/auto_test/ltp-syscall_linux.yaml +++ /dev/null @@ -1,32 +0,0 @@ -debug_lkp: "2" -suite: ltp -category: functional -need_memory: 4G -setup.disk: 1HDD -# setup.fs: -# - ext4 -testbox: vm-2p8g -os: openeuler - -ss: - ltp: - commit: e8e8f21729d - _url: git+https://gitee.com/zhengzengkai/ltp.git - -ss: - linux: - commit: fd00be9afa1d - config: ./openeuler_defconfig_arm64 - _url: git+https://mirrors.tuna.tsinghua.edu.cn/git/linux-next.git - project: linux - -program.ltp: - test: - - syscalls-00 - - syscalls-01 - - syscalls-02 - - syscalls-03 - - syscalls-04 - - syscalls-05 - - syscalls-06 - - syscalls-07 diff --git a/programs/bisect-py/jobs/auto_test/ltp.yaml b/programs/bisect-py/jobs/auto_test/ltp.yaml index 2b995a83eb914094d423a83cd2971985b8764b3d..3d37c81e01007c26e45424889190bee5a0656057 100644 --- a/programs/bisect-py/jobs/auto_test/ltp.yaml +++ b/programs/bisect-py/jobs/auto_test/ltp.yaml @@ -7,7 +7,6 @@ setup.disk: 1HDD # - ext4 testbox: - vm-2p8g - # - taishan200-2280-2s64p-128g--a1003 # - taishan200-2280-2s48p-512g--a1320 os: openeuler diff --git a/programs/bisect-py/jobs/auto_test/trinity.yaml b/programs/bisect-py/jobs/auto_test/trinity.yaml index fe0121608ea7aaa4fb3750a89dea1d2ebf30573e..6c3ffd18ab72d69a8939d29816b1b051327e6653 100644 --- a/programs/bisect-py/jobs/auto_test/trinity.yaml +++ b/programs/bisect-py/jobs/auto_test/trinity.yaml @@ -4,7 +4,7 @@ category: functional need_memory: 4G testbox: - vm-2p8g - - taishan200-2280-2s48p-512g--a1322 + - taishan200-2280-2s48p-512g--a1322 os: openeuler ss: diff --git a/programs/bisect-py/jobs/auto_test/xfstest.yaml b/programs/bisect-py/jobs/auto_test/xfstest.yaml index 3bdb200a8c668c6770c2ebb807ab69a6bc4b8b1f..36fa876f987b86105d82ed7d31f859cd4fd01f93 100644 --- a/programs/bisect-py/jobs/auto_test/xfstest.yaml +++ b/programs/bisect-py/jobs/auto_test/xfstest.yaml @@ -7,7 +7,7 @@ setup.fs: - ext4 testbox: - vm-2p8g - #- taishan200-2280-2s48p-512g--a1322 + #- taishan200-2280-2s48p-512g--a1320 os: openeuler ss: diff --git a/programs/bisect-py/kernel-ci/linux_auto_test.py b/programs/bisect-py/kernel-ci/linux_auto_test.py index ee919f84622b5772a885cdf89057c686af6a5288..1b7d3f674576bb4527cad3f9355732c03fc16a83 100644 --- a/programs/bisect-py/kernel-ci/linux_auto_test.py +++ b/programs/bisect-py/kernel-ci/linux_auto_test.py @@ -453,7 +453,7 @@ def main(): commit_to_use = head_commit try: - job_id, _, _ = gb.submit_job(functional_job) + job_id, _, _ = gb.submit_job(functional_job, force=True) logger.info(f"Regular test job submitted: JobID={job_id} ({commit_to_use[:7] if isinstance(commit_to_use, str) else str(commit_to_use)})") log_summary(job_id, 'functional', commit_to_use if isinstance(commit_to_use, str) else head_commit) return 0 @@ -473,7 +473,7 @@ def main(): # baseline and current jobs separately logger.info(f"Performance test using commit set via override: {override_commit}") try: - job_id, _, _ = gb.submit_job(performance_job) + job_id, _, _ = gb.submit_job(performance_job, force=True) logger.info(f"Performance test job submitted: JobID={job_id} (commit: {override_commit[:7] if isinstance(override_commit, str) else str(override_commit)})") # Print JobID in a parseable format for shell script print(f"JobID={job_id}") @@ -498,12 +498,12 @@ def main(): # Submit baseline job baseline_job = deepcopy(template) baseline_job = update_job_commit(baseline_job, base_commit) - base_info = gb.submit_job(baseline_job) + base_info = gb.submit_job(baseline_job, force=True) # Submit test job test_job = deepcopy(template) test_job = update_job_commit(test_job, head_commit) - test_info = gb.submit_job(test_job) + test_info = gb.submit_job(test_job, force=True) if base_info and test_info: base_job_id, _ = base_info diff --git a/programs/bisect-py/py_bisect.py b/programs/bisect-py/py_bisect.py index 22965c96289850d91cb823247f7eec435bd036f9..550ef3abf500d50eb91e93fce5272d0ad835fca2 100644 --- a/programs/bisect-py/py_bisect.py +++ b/programs/bisect-py/py_bisect.py @@ -65,6 +65,28 @@ class VerificationTimeoutError(VerificationError): """Raised when verification takes too long""" pass +class GitOperationError(BisectError): + """Git操作相关错误""" + pass + +class CloneError(GitOperationError): + """Git克隆错误""" + def __init__(self, url, retries): + self.repo_url = url + self.retry_count = retries + super().__init__(f"Failed to clone {url} after {retries} retries") + +class CheckoutError(GitOperationError): + """Git检出错误""" + def __init__(self, commit, repo_path): + self.commit = commit + self.repo_path = repo_path + super().__init__(f"Checkout failed for {commit} in {repo_path}") + +class InvalidCommitError(GitOperationError): + """Invalid Git commit hash error""" + pass + # 可重试错误类型定义 RETRYABLE_ERRORS = ( JobSubmissionError, # 作业提交失败 @@ -176,7 +198,62 @@ class GitBisect: if verification_timeout < 60: raise ValueError("BISECT_VERIFICATION_TIMEOUT must be at least 60 seconds") + def _extract_lkp_md5(self, job_id: str) -> Optional[str]: + """ + Extract LKP MD5 from job info for environment reproducibility tracking. + + Args: + job_id: Job ID to query + + Returns: + LKP MD5 string or None if not found + """ + try: + job_info = self.bisect_db.get_job_info(job_id) + if job_info: + pkg_data = job_info.get('pkg_data', {}) + if isinstance(pkg_data, dict): + lkp_tests_info = pkg_data.get('lkp-tests', {}) + if isinstance(lkp_tests_info, dict): + return lkp_tests_info.get('md5') + except Exception as e: + self.logger.debug(f"Failed to extract lkp_md5: {e}") + return None + + def _calculate_job_reuse_stats(self) -> tuple: + """ + Calculate job reuse statistics from commit_jobs.jsonl. + + Returns: + Tuple of (job_request_count, job_reused_count, job_reused_rate) + """ + job_request_count = 0 + job_reused_count = 0 + commit_jobs_path = os.path.join(self.temp_result_root, 'commit_jobs.jsonl') + + if os.path.exists(commit_jobs_path): + try: + with open(commit_jobs_path, 'r', encoding='utf-8') as f: + for line in f: + try: + record = json.loads(line.strip()) + job_request_count += 1 + if record.get('is_reused', False): + job_reused_count += 1 + except json.JSONDecodeError: + continue + except Exception as e: + self.logger.warning(f"Failed to calculate reuse stats from commit_jobs.jsonl: {e}") + # Fallback to instance variables if file reading fails + job_request_count = self.job_request_count + job_reused_count = self.job_reused_count + else: + # Fallback to instance variables if file doesn't exist + job_request_count = self.job_request_count + job_reused_count = self.job_reused_count + job_reused_rate = job_reused_count / job_request_count if job_request_count > 0 else 0.0 + return job_request_count, job_reused_count, job_reused_rate def find_first_bad_commit(self, task: Dict[str, Any], repo_dir: Optional[str] = None) -> Optional[Dict[str, Any]]: """ @@ -457,10 +534,35 @@ class GitBisect: except Exception as e: self.logger.error(f"Failed to save complete bisect log: {str(e)}") + def _is_bare_repo(self, repo_path: str) -> bool: + """ + Check if the repository is a bare repository + + Args: + repo_path: Path to the git repository + + Returns: + True if bare repository, False otherwise + """ + try: + result = subprocess.run( + ['git', '-C', repo_path, 'rev-parse', '--is-bare-repository'], + capture_output=True, + text=True, + check=True, + timeout=10 + ) + return result.stdout.strip().lower() == 'true' + except Exception: + return False + def _reset_repo_to_clean_state(self, repo_path: str) -> bool: """ Reset repository to clean state before bisect + For bare repositories, only reset bisect state (no working directory to clean). + For non-bare repositories, also reset working directory and clean untracked files. + Args: repo_path: Path to the git repository @@ -468,7 +570,9 @@ class GitBisect: True if successful, False otherwise """ try: - # 1. Stop any ongoing bisect + is_bare = self._is_bare_repo(repo_path) + + # 1. Stop any ongoing bisect (works for both bare and non-bare) try: subprocess.run( ['git', '-C', repo_path, 'bisect', 'reset'], @@ -481,19 +585,24 @@ class GitBisect: # Ignore if no bisect is running pass - # 2. Reset to HEAD, discarding all changes + # For bare repositories, no working directory cleanup needed + if is_bare: + self.logger.info(f"Bare repository detected, skipping working directory cleanup: {repo_path}") + return True + + # 2. Reset to HEAD, discarding all changes (non-bare only) reset_cmd = ['git', '-C', repo_path, 'reset', '--hard', 'HEAD'] - result = subprocess.run(reset_cmd, capture_output=True, text=True, check=True, timeout=60) + subprocess.run(reset_cmd, capture_output=True, text=True, check=True, timeout=60) self.logger.info(f"Reset repository to clean state: {repo_path}") - # 3. Clean untracked files and directories + # 3. Clean untracked files and directories (non-bare only) clean_cmd = ['git', '-C', repo_path, 'clean', '-fd'] - result = subprocess.run(clean_cmd, capture_output=True, text=True, check=True, timeout=60) + subprocess.run(clean_cmd, capture_output=True, text=True, check=True, timeout=60) self.logger.info(f"Cleaned untracked files in {repo_path}") return True - except subprocess.TimeoutExpired as e: + except subprocess.TimeoutExpired: self.logger.error(f"Timeout while resetting repository: {repo_path}") return False except subprocess.CalledProcessError as e: @@ -575,6 +684,12 @@ class GitBisect: self.temp_result_root = result_root self.temp_git_base = os.path.join(result_root, 'git_repos') + # 只在主 bisect 进程中清理文件,bisect_steps.py 等子进程不清理 + # 通过检查 BISECT_HEAD 环境变量判断是否在 git bisect run 子进程中 + in_bisect_run = os.environ.get('GIT_DIR') is not None or os.path.exists(os.path.join(result_root, 'commit_jobs.jsonl')) + if not in_bisect_run: + self._cleanup_previous_session_files(result_root) + # 确保git目录存在 os.makedirs(self.temp_git_base, exist_ok=True) @@ -596,6 +711,39 @@ class GitBisect: self.logger.info("Bisect session started", task=task_info) self.logger.info("Using HTTP-based database client") + def _cleanup_previous_session_files(self, result_root: str) -> None: + """ + 清理上次 bisect 会话遗留的状态文件 + + 在新的 bisect 开始前清理旧文件,防止: + 1. 旧的 commit_jobs.jsonl 影响新的 bisect 判断 + 2. 旧的可视化结果误导用户 + + Args: + result_root: bisect 结果目录 + """ + # 需要清理的文件列表(不清理 *.log,避免删除我们自己的日志) + files_to_clean = [ + 'commit_jobs.jsonl', # 提交测试记录 + 'bisect_visualization.txt', # 可视化结果 + 'bisect_summary.json', # 摘要 + ] + + cleaned_count = 0 + + for filename in files_to_clean: + filepath = os.path.join(result_root, filename) + if os.path.exists(filepath): + try: + os.remove(filepath) + cleaned_count += 1 + except OSError: + pass # 忽略删除失败,不阻塞启动 + + if cleaned_count > 0: + # 注意:此时 logger 尚未初始化,使用 print 输出 + print(f"[bisect] Cleaned {cleaned_count} files from previous session in {result_root}") + def _detect_build_task(self, job_dict: dict) -> bool: """ Detect if a job is a build/compile task based on its configuration. @@ -664,7 +812,7 @@ class GitBisect: self.bad_job = self.init_job_content(self.bad_job_id) # 保存bad_job的原始health状态,用于后续bisect状态判断 self.bad_job_health = self.bad_job.get('job_health', 'unknown') - self.bad_job.pop('job_health') + self.bad_job.pop('job_health', None) # Use default to avoid KeyError if key doesn't exist self.logger.info(f"Bad job health status: {self.bad_job_health}") except ValueError as e: self.logger.error(f"Invalid job ID - original value: {self.bad_job_id}") @@ -1416,6 +1564,9 @@ class GitBisect: job_id, result_root, is_reused = self.submit_job(job_copy) job_stats, job_health = self._poll_job_stats(job_id, result_root) + # Extract LKP MD5 from job info for environment reproducibility tracking + lkp_md5 = self._extract_lkp_md5(job_id) + # Extract metric value if this is a performance bisect metric_value = None if self.metric and self.metric in job_stats: @@ -1424,9 +1575,6 @@ class GitBisect: except (ValueError, TypeError): pass - # Record the job with performance value if available - self.record_jobs((job_id, result_root), commit, job_health, metric_value, is_reused=is_reused) - # 判断逻辑 if self.metric: # 性能 bisect @@ -1435,25 +1583,40 @@ class GitBisect: if metric_value is not None: if (metric_value - self.mid_point) * self.direction > 0: self.logger.info(f"Commit {commit[:8]} is good (performance: {metric_value:.2f})") + self.record_jobs((job_id, result_root), commit, 'good', metric_value, is_reused=is_reused, + certainty='100%', decision_reason='perf_above_threshold', lkp_md5=lkp_md5) return 'good' else: self.logger.info(f"Commit {commit[:8]} is bad (performance: {metric_value:.2f})") + self.record_jobs((job_id, result_root), commit, 'bad', metric_value, is_reused=is_reused, + certainty='100%', decision_reason='perf_below_threshold', lkp_md5=lkp_md5) return 'bad' else: self.logger.warning(f"Metric {self.metric} not found in job stats") + self.record_jobs((job_id, result_root), commit, 'skip', metric_value, is_reused=is_reused, + certainty='uncertain', decision_reason='metric_not_found', lkp_md5=lkp_md5) return 'skip' else: # 回退到 job_health 判断(仅检查功能性问题) if job_health == 'success': self.logger.info(f"Commit {commit[:8]} is good (job_health=success)") + self.record_jobs((job_id, result_root), commit, 'good', metric_value, is_reused=is_reused, + certainty='100%', decision_reason='job_health_success', lkp_md5=lkp_md5) return 'good' else: self.logger.info(f"Commit {commit[:8]} is bad (job_health={job_health})") + self.record_jobs((job_id, result_root), commit, 'bad', metric_value, is_reused=is_reused, + certainty='100%', decision_reason=f'job_health_{job_health}', lkp_md5=lkp_md5) return 'bad' else: - # 错误 bisect,检查是否包含指定的 error_id - status = self._check_error_id(job_stats, self.error_id, job_health, result_root) - self.logger.info(f"Commit {commit[:8]} status: {status}") + # 错误 bisect:检查是否包含指定的 error_id + # 不再强制要求 job_health == success,因为: + # 1. 任务可能因为其他无关错误失败,但不包含目标 error_id + # 2. 这种情况下应该视为 good(目标错误未出现) + status, certainty, reason = self._check_error_id(job_stats, self.error_id, job_health, result_root) + self.logger.info(f"Commit {commit[:8]} status: {status} (job_health={job_health}, certainty={certainty}, reason={reason})") + self.record_jobs((job_id, result_root), commit, status, metric_value, is_reused=is_reused, + certainty=certainty, decision_reason=reason, lkp_md5=lkp_md5) return status except Exception as e: @@ -1947,7 +2110,7 @@ class GitBisect: self.logger.error(f"Error checking existing completed jobs: {str(e)}") return [] - def record_jobs(self, job_info: list, job_commit: str, status: str, metric_value: Optional[float] = None, is_reused: bool = False) -> None: + def record_jobs(self, job_info: list, job_commit: str, status: str, metric_value: Optional[float] = None, is_reused: bool = False, certainty: str = "100%", decision_reason: str = None, lkp_md5: str = None) -> None: """ Record job information in JSON Lines format. Args: @@ -1956,6 +2119,9 @@ class GitBisect: status: Job status ('good', 'bad', 'skip') metric_value: Optional performance metric value (for performance bisect) is_reused: Whether the job was reused from existing results + certainty: Confidence level of the decision ("100%" or "uncertain") + decision_reason: Reason for the decision (e.g., "errid_found", "job_success", "unrelated_failure") + lkp_md5: MD5 hash of the LKP test environment for reproducibility tracking """ # Create result directory if not exists [1] os.makedirs(self.temp_result_root, exist_ok=True) @@ -1970,9 +2136,18 @@ class GitBisect: "job_result_root": job_info[1], "status": status, "is_reused": is_reused, + "certainty": certainty, "timestamp": time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) } + # Add decision reason if provided + if decision_reason: + record["decision_reason"] = decision_reason + + # Add LKP MD5 for environment reproducibility tracking + if lkp_md5: + record["lkp_md5"] = lkp_md5 + # For performance bisect, include the metric value if metric_value is not None: record["metric_value"] = metric_value @@ -1997,6 +2172,9 @@ class GitBisect: Returns: tuple: (job_id, result_root, is_reused) for the job (existing or new) + + Raises: + JobSubmissionError: If job submission fails """ try: self.job_request_count += 1 @@ -2024,12 +2202,19 @@ class GitBisect: self.logger.info(f"Not enough completed jobs found ({completed_jobs_count}/{success_limit}), submitting a new job.") job_info = self._submit_new_job(job_copy) - if not job_info: - raise JobSubmissionError("Job submission failed") + if not job_info or len(job_info) < 2: + raise JobSubmissionError("Job submission failed: _submit_new_job returned invalid result") - self.logger.info(f"Successfully submitted new job: {job_info[0]}") + job_id, result_root = job_info[0], job_info[1] + if not job_id: + raise JobSubmissionError("Job submission failed: job_id is None or empty") + + self.logger.info(f"Successfully submitted new job: {job_id}") # Return (job_id, result_root, is_reused=False) - return job_info[0], job_info[1], False + return job_id, result_root, False + except Exception as e: + self.logger.error(f"submit_job failed: {type(e).__name__}: {str(e)}") + raise finally: self._submitting_job = False @@ -2071,16 +2256,22 @@ class GitBisect: self.logger.info(f"Generated temporary job file: {temp_yaml_path}") # Submit the job and wait for the response - # Use --no-pack to skip LKP_SRC delta packing (not needed for bisect) lkp_src = os.environ['LKP_SRC'] + # Prepare clean environment for submit + # git bisect sets GIT_DIR/GIT_WORK_TREE which interferes with git operations + # in other repositories (like LKP_SRC), so we need to clear them + submit_env = os.environ.copy() + submit_env.pop('GIT_DIR', None) + submit_env.pop('GIT_WORK_TREE', None) + # Use subprocess.run with explicit cwd to ensure proper working directory result = subprocess.run( [f"{lkp_src}/sbin/submit", temp_yaml_path], cwd=lkp_src, capture_output=True, text=True, - env=os.environ.copy() + env=submit_env ) retcode = result.returncode response = result.stdout + result.stderr @@ -2149,7 +2340,8 @@ class GitBisect: self.logger.info('wait for status') """Wait for job completion and determine status""" job_stats, job_health = self._poll_job_stats(job_id, result_root) - return self._determine_status(job_stats, job_health, result_root, metric_criteria) + status, certainty, reason = self._determine_status(job_stats, job_health, result_root, metric_criteria) + return status def submit_wait_job_status(self, job: Dict[str, Any], stat: str) -> str: """ @@ -2182,7 +2374,10 @@ class GitBisect: # Submit job and wait for status job_id, result_root, is_reused = self.submit_job(job) job_stats, job_health = self._poll_job_stats(job_id, result_root) - status = self._determine_status(job_stats, job_health, result_root, criteria) + status, certainty, reason = self._determine_status(job_stats, job_health, result_root, criteria) + + # Extract LKP MD5 from job info for environment reproducibility tracking + lkp_md5 = self._extract_lkp_md5(job_id) # Extract metric value if this is a performance bisect metric_value = None @@ -2192,11 +2387,12 @@ class GitBisect: except (ValueError, TypeError): pass - # Record results with performance value if available + # Record results with performance value, certainty, reason, and lkp_md5 commit = self._get_commit_from_job(job) - self.record_jobs((job_id, result_root), commit, status, metric_value, is_reused=is_reused) + self.record_jobs((job_id, result_root), commit, status, metric_value, is_reused=is_reused, + certainty=certainty, decision_reason=reason, lkp_md5=lkp_md5) - self.logger.info(f"Final status: {status}") + self.logger.info(f"Final status: {status} (certainty={certainty}, reason={reason})") return status except (JobSubmissionError, JobStatusTimeoutError) as e: @@ -2298,23 +2494,32 @@ class GitBisect: raise JobStatusTimeoutError(f"Job {job_id} exceeded maximum wait time") - def _determine_status(self, stats: dict, job_health: str, result_root: str, criteria: dict) -> str: - """Determine job status based on criteria""" + def _determine_status(self, stats: dict, job_health: str, result_root: str, criteria: dict) -> tuple: + """ + Determine job status based on criteria + + Returns: + tuple: (status, certainty, reason) + """ try: if criteria['type'] == 'perf': - status = self._check_perf_metric(stats, criteria['metric']) + status, certainty, reason = self._check_perf_metric(stats, criteria['metric']) self.logger.debug("Performance status determined", status=status, + certainty=certainty, + reason=reason, metric=criteria['metric'], stats=stats) - return status + return status, certainty, reason - status = self._check_error_id(stats, criteria['error_id'], job_health, result_root) + status, certainty, reason = self._check_error_id(stats, criteria['error_id'], job_health, result_root) self.logger.debug("Error ID status determined", status=status, + certainty=certainty, + reason=reason, error_id=criteria['error_id'], stats=stats) - return status + return status, certainty, reason except Exception as e: self.logger.error("Status determination failed", @@ -2324,18 +2529,21 @@ class GitBisect: stats=stats) raise - def _check_perf_metric(self, stats, metric) -> str: + def _check_perf_metric(self, stats, metric) -> tuple: """ 使用mid_point + direction逻辑评估性能指标状态 逻辑: - direction = -1 (lower is better): measured < mid_point → GOOD - direction = 1 (higher is better): measured > mid_point → GOOD + + Returns: + tuple: (status, certainty, reason) """ value = stats.get(metric) if value is None: self.logger.error(f"Metric {metric} not found in job stats") - return 'skip' + return ('skip', 'uncertain', 'metric_not_found') try: measured = float(value) @@ -2343,7 +2551,7 @@ class GitBisect: # 检查 mid_point 和 direction 是否已设置 if self.mid_point is None or self.direction is None: self.logger.error("mid_point or direction not set for performance evaluation") - return 'skip' + return ('skip', 'uncertain', 'midpoint_not_set') self.logger.info(f"Performance check: measured={measured}, mid_point={self.mid_point}, direction={self.direction}") @@ -2351,14 +2559,14 @@ class GitBisect: # (measured - mid_point) * direction > 0 表示朝着好的方向 if (measured - self.mid_point) * self.direction > 0: self.logger.info("Performance is on the good side → GOOD") - return 'good' + return ('good', '100%', 'perf_above_threshold') else: self.logger.info("Performance is on the bad side → BAD") - return 'bad' + return ('bad', '100%', 'perf_below_threshold') except (ValueError, TypeError) as e: self.logger.error(f"Invalid metric value {value} for {metric}: {e}") - return 'skip' + return ('skip', 'uncertain', 'invalid_metric_value') def _was_file_compiled(self, errid: str, result_root: str) -> Optional[bool]: """ @@ -2455,7 +2663,7 @@ class GitBisect: self.logger.warning(f"Timeout waiting for build log after {timeout}s") return False - def _check_error_id(self, stats: dict, error_id: str, current_job_health: str, result_root: str) -> str: + def _check_error_id(self, stats: dict, error_id: str, current_job_health: str, result_root: str) -> tuple: """ Determines the bisect status by combining errid presence, job health, build stage progress, and build time analysis. @@ -2467,19 +2675,22 @@ class GitBisect: result_root: The path to the job's result directory for log inspection. Returns: - 'bad', 'good', or 'skip'. + tuple: (status, certainty, reason) + - status: 'bad', 'good', or 'skip' + - certainty: '100%' for definitive decisions, 'uncertain' for heuristic decisions + - reason: explanation of the decision """ errid_found = self._has_error_id(stats, error_id) # Rule 1: If the error ID is found, the commit is definitively bad. if errid_found: self.logger.info(f"errid '{error_id}' found. Result: bad") - return 'bad' + return ('bad', '100%', 'errid_found') # Rule 2: If the job was successful and the error ID was not found, the commit is good. if current_job_health == 'success': self.logger.info(f"errid not found and job_health is 'success'. Result: good") - return 'good' + return ('good', '100%', 'job_success_no_errid') # At this point, the job has failed for other reasons, and the target errid was not found. @@ -2492,7 +2703,7 @@ class GitBisect: if not self.is_build_task: self.logger.info(f"errid not found, job failed, but this is a test task (not build). Result: good") self.logger.debug(f"Test task failures without target errid don't affect bisect result") - return 'good' + return ('good', 'uncertain', 'unrelated_test_failure') # Rule 4: For build/compile tasks, use enhanced analysis # Build tasks need more careful analysis because: @@ -2501,7 +2712,7 @@ class GitBisect: # - Use build_stage and build_time to assess build progress return self._check_build_task_failure(error_id, stats, result_root) - def _check_build_task_failure(self, error_id: str, stats: dict, result_root: str) -> str: + def _check_build_task_failure(self, error_id: str, stats: dict, result_root: str) -> tuple: """ Enhanced build task failure analysis using build_stage and build_time. @@ -2518,14 +2729,17 @@ class GitBisect: result_root: Path to job results Returns: - 'good', 'bad', or 'skip' + tuple: (status, certainty, reason) + - status: 'good', 'bad', or 'skip' + - certainty: '100%' for definitive decisions, 'uncertain' for heuristic decisions + - reason: explanation of the decision """ # GUARD: This method should ONLY be called for build tasks # If somehow called for a test task, return 'good' immediately if not self.is_build_task: self.logger.warning(f"_check_build_task_failure called for non-build task! Treating as test task: returning 'good'") self.logger.debug(f"Task detection: suite={self.bad_job.get('suite')}, has_ss={bool(self.bad_job.get('ss'))}") - return 'good' + return ('good', 'uncertain', 'non_build_task_guard') # Extract build metadata from stats build_stage = stats.get('build_stage.max', 0) @@ -2554,7 +2768,7 @@ class GitBisect: # Even if error_id not in stats, check if similar errors exist in log if self._find_error_evidence_in_log(error_id, result_root): self.logger.info(f"Found error evidence in build log for '{error_id}'. Result: bad") - return 'bad' + return ('bad', '100%', 'error_evidence_in_log') # Strategy 2: Enhanced build progress detection # Don't just rely on build_stage number - check actual compilation progress @@ -2572,12 +2786,12 @@ class GitBisect: # If build didn't even start (stage < 20), we can't trust the result if build_stage < 20: self.logger.warning(f"Build stage {build_stage} < 20, build didn't start. Result: skip") - return 'skip' + return ('skip', 'uncertain', 'build_not_started') # If build completed successfully (reached packaging), missing error means good if build_stage >= 30: self.logger.info(f"Build stage {build_stage} >= 30, reached packaging phase. Missing error means good") - return 'good' + return ('good', '100%', 'build_completed_no_errid') # Build stage 20 just means "build started", not "build made progress" # We need to check actual build progress by looking at the log content: @@ -2587,12 +2801,12 @@ class GitBisect: if phases['link_started']: self.logger.info(f"Build stage {build_stage}, linking/archiving started (AR commands found). " f"Build made significant progress, missing error means good") - return 'good' + return ('good', 'uncertain', 'build_progress_link_started') if phases['many_files_compiled']: self.logger.info(f"Build stage {build_stage}, many files compiled successfully. " f"Build made significant progress, missing error means good") - return 'good' + return ('good', 'uncertain', 'build_progress_many_files') # If build started but no clear progress indicators self.logger.info(f"Build stage {build_stage}, but no clear progress indicators (no AR, few CC commands). " @@ -2606,10 +2820,10 @@ class GitBisect: time_ratio = float(build_time) / float(reference_time) if time_ratio < 0.3: # Build completed less than 30% of normal time self.logger.warning(f"Build time {build_time}s is only {time_ratio:.1%} of reference {reference_time}s. Result: skip") - return 'skip' + return ('skip', 'uncertain', 'build_time_too_short') elif time_ratio > 0.7: # Build completed more than 70% of normal time self.logger.info(f"Build time {build_time}s is {time_ratio:.1%} of reference, sufficient progress. Result: good") - return 'good' + return ('good', 'uncertain', 'build_time_sufficient') # Strategy 4: Final fallback - default to BAD for missing error in failed build # This should be rare now that we check build progress indicators @@ -2617,7 +2831,7 @@ class GitBisect: self.logger.warning(f"Build stage {build_stage}, but no clear progress indicators found. " f"No evidence for error '{error_id}', defaulting to BAD for missing error in failed build") self.logger.warning(f"This is a rare case - consider manual verification") - return 'bad' + return ('bad', 'uncertain', 'fallback_missing_error_in_failed_build') def _find_error_evidence_in_log(self, error_id: str, result_root: str) -> bool: """ @@ -3688,7 +3902,7 @@ class GitBisect: try: # 1. 获取 parent commit - parent_commit = self._get_parent_commit_hash(first_bad_commit) + parent_commit = self._get_parent_commit(first_bad_commit) if not parent_commit: self.logger.error("Failed to get parent commit") result['status'] = 'failed' @@ -3699,7 +3913,7 @@ class GitBisect: self.logger.info(f"parent_commit: {parent_commit[:12]}") # 2. 获取 HEAD commit (如果需要,用于提交验证作业) - head_commit = self._get_head_commit_hash() + head_commit = self._get_current_head_commit() if head_commit: self.logger.info(f"HEAD_commit: {head_commit[:12]}") else: @@ -3746,28 +3960,46 @@ class GitBisect: self.logger.info(f"Introduced {len(introduced_errids)} error IDs") - # 5.5. Git-based verification (仅限构建任务) - if self.is_build_task and introduced_errids: - self.logger.info("Performing git-based file modification verification (build task only)...") - self.logger.info(f"Note: Only checking current error_id to avoid large logs ({len(introduced_errids)} total introduced)") - git_verification = self._verify_errids_with_git( - parent_commit, - first_bad_commit, - introduced_errids - ) - result['git_verification'] = git_verification - self.logger.info(f"Git verification: verified={git_verification['verified']}, " - f"confidence={git_verification['confidence']}, " - f"need_human_judgment={git_verification.get('need_human_judgment', False)}") + # 5.5. 两步验证逻辑 + # 步骤1: 验证 self.error_id 是否在 introduced_errids 中 + error_id_verification = self._verify_error_id_in_introduced(introduced_errids) + result['error_id_verification'] = error_id_verification + + if not error_id_verification['found']: + # error_id 未在 introduced_errids 中找到,验证失败 + self.logger.warning(f"Target error_id '{self.error_id}' NOT found in introduced_errids") + self.logger.warning(f"This means the bisect result may be inaccurate") + # 直接标记验证失败,不需要 git_verification + result['verification_passed'] = False + result['verification_confidence'] = 0.0 + result['verification_reason'] = 'target_error_id_not_in_introduced' else: - if not self.is_build_task: - self.logger.info("Skipping git verification (not a build task)") - result['git_verification'] = { - 'verified': True, - 'confidence': 1.0, - 'reason': 'not_applicable', - 'need_human_judgment': False - } + # error_id 在 introduced_errids 中找到 + self.logger.info(f"Target error_id verification PASSED: '{self.error_id}' found in introduced_errids") + + # 步骤2: Git-based verification (仅限构建任务) + if self.is_build_task: + self.logger.info("Performing git-based file modification verification (build task only)...") + self.logger.info(f"Note: Only checking current error_id to avoid large logs ({len(introduced_errids)} total introduced)") + git_verification = self._verify_errids_with_git( + parent_commit, + first_bad_commit, + introduced_errids + ) + result['git_verification'] = git_verification + self.logger.info(f"Git verification: verified={git_verification['verified']}, " + f"confidence={git_verification['confidence']}, " + f"need_human_judgment={git_verification.get('need_human_judgment', False)}") + # 使用 git_verification 的结果 + result['verification_passed'] = git_verification.get('verified', True) + result['verification_confidence'] = git_verification.get('confidence', 1.0) + result['verification_reason'] = git_verification.get('reason', 'git_verification_passed') + else: + # 非构建任务:error_id 找到即验证通过,不需要 git 文件检查 + self.logger.info("Skipping git verification (not a build task, error_id found is sufficient)") + result['verification_passed'] = True + result['verification_confidence'] = 1.0 + result['verification_reason'] = 'error_id_verified_test_task' # 6. 检查 HEAD 状态 (如果提交了HEAD job) # 注意:HEAD 检测只是附加信息,不影响 bisect 验证结果 @@ -3779,19 +4011,19 @@ class GitBisect: result['head_check'] = head_result self.logger.info(f"HEAD check (informational only): {head_result.get('status', 'unknown')}") - # 7. 整合验证结果 - git_ver = result.get('git_verification', {}) - result['status'] = 'success' - result['verification_passed'] = git_ver.get('verified', True) - result['verification_confidence'] = git_ver.get('confidence', 1.0) - result['verification_reason'] = git_ver.get('reason', 'boundary_check_passed') + # 7. 根据验证结果设置最终 status + if result.get('verification_passed', False): + result['status'] = 'success' + else: + result['status'] = 'failed' + self.logger.warning(f"Boundary verification FAILED: {result.get('verification_reason', 'unknown')}") self.logger.info("=" * 80) self.logger.info("Boundary verification COMPLETED") self.logger.info(f"Introduced {len(introduced_errids)} error IDs") - self.logger.info(f"Verification: {'PASSED' if result['verification_passed'] else 'FAILED'}") - self.logger.info(f"Confidence: {result['verification_confidence']}") - if git_ver.get('need_human_judgment'): + self.logger.info(f"Verification: {'PASSED' if result.get('verification_passed') else 'FAILED'}") + self.logger.info(f"Confidence: {result.get('verification_confidence', 0)}") + if result.get('git_verification', {}).get('need_human_judgment'): self.logger.warning("HUMAN JUDGMENT RECOMMENDED") self.logger.info("=" * 80) @@ -3804,38 +4036,6 @@ class GitBisect: result['error'] = str(e) return result - def _get_parent_commit_hash(self, commit: str) -> Optional[str]: - """获取提交的父提交""" - try: - result = subprocess.run( - ['git', '-C', self.work_dir, 'rev-parse', f'{commit}^1'], - capture_output=True, - text=True, - check=True, - timeout=30 - ) - parent = result.stdout.strip() - return parent if parent else None - except Exception as e: - self.logger.error(f"Failed to get parent commit: {str(e)}") - return None - - def _get_head_commit_hash(self) -> Optional[str]: - """获取HEAD提交""" - try: - result = subprocess.run( - ['git', '-C', self.work_dir, 'rev-parse', 'HEAD'], - capture_output=True, - text=True, - check=True, - timeout=30 - ) - head = result.stdout.strip() - return head if head else None - except Exception as e: - self.logger.error(f"Failed to get HEAD commit: {str(e)}") - return None - def _submit_verification_jobs(self, parent_commit: str, head_commit: Optional[str], first_bad_commit: str) -> Dict[str, str]: """ @@ -4004,9 +4204,10 @@ class GitBisect: errids.append(stats['errid']) # 方式2: 从 stats keys 中查找 errid 模式 - # 常见模式: makepkg.eid.*, *.c:*, *.h:*, etc. + # 通用模式: *.eid.* (所有测试框架的 errid 格式) + # 以及特定的源代码文件错误模式 errid_patterns = [ - 'makepkg.eid.', # makepkg build errors + '.eid.', # 通用 errid 格式 (ltp.eid.*, makepkg.eid.*, xfstests.eid.*, etc.) '.c:', # C source file errors '.h:', # Header file errors '.cpp:', # C++ source file errors @@ -4029,6 +4230,51 @@ class GitBisect: return list(set(errids)) # 去重 + def _verify_error_id_in_introduced(self, introduced_errids: List[str]) -> Dict[str, Any]: + """ + 验证 self.error_id 是否在 introduced_errids 中 + + 这是验证 bisect 结果准确性的关键步骤: + - 如果 error_id 在 introduced_errids 中,说明 first_bad_commit 确实引入了这个错误 + - 如果 error_id 不在 introduced_errids 中,说明 bisect 结果可能不准确 + + Args: + introduced_errids: 引入的 errid 列表 + + Returns: + { + 'found': bool, # 是否找到 + 'error_id': str, # 原始的 error_id + 'introduced_count': int # introduced_errids 数量 + } + """ + result = { + 'found': False, + 'error_id': self.error_id, + 'introduced_count': len(introduced_errids) + } + + if not self.error_id: + self.logger.warning("No error_id set, skipping verification") + return result + + if not introduced_errids: + self.logger.warning("No introduced_errids to check against") + return result + + self.logger.info(f"Verifying error_id '{self.error_id}' against {len(introduced_errids)} introduced errids") + + # 精确匹配 + if self.error_id in introduced_errids: + result['found'] = True + self.logger.info(f"error_id '{self.error_id}' found in introduced_errids") + return result + + # 未找到匹配 + self.logger.warning(f"error_id '{self.error_id}' NOT found in introduced_errids") + self.logger.debug(f"Introduced errids: {introduced_errids[:10]}...") # 只打印前10个避免日志过大 + return result + def _check_head_regression(self, head_job_id: str) -> Dict: """ 检查HEAD是否存在回归(仅供参考,不影响 bisect 验证结果) @@ -4110,31 +4356,47 @@ class GitBisect: def _check_file_modified_in_commit(self, commit: str, file_path: str) -> Optional[bool]: """ 检查文件是否在指定提交中被修改 - - 使用 git diff-tree 检查单个提交,这是最精确的方法 - + + 使用 git show --stat 检查单个提交的文件修改情况 + Args: commit: 提交哈希 file_path: 文件路径 - + Returns: True 如果文件被修改,False 如果未被修改,None 如果检查失败 """ try: result = subprocess.run( - ['git', '-C', self.work_dir, 'diff-tree', - '--no-commit-id', '--name-only', '-r', commit], + ['git', '-C', self.work_dir, 'show', commit, '--stat', '--format='], capture_output=True, text=True, check=True, timeout=30 ) - modified_files = result.stdout.strip().split('\n') - is_modified = file_path in modified_files - + # git show --stat 输出格式示例: + # drivers/net/file.c | 10 +++++----- + # include/header.h | 2 +- + # 2 files changed, 6 insertions(+), 6 deletions(-) + # 需要从每行提取文件路径(在 | 之前的部分) + stat_output = result.stdout.strip() + modified_files = [] + for line in stat_output.split('\n'): + if '|' in line: + # 提取 | 之前的文件路径并去除空格 + file_part = line.split('|')[0].strip() + if file_part: + modified_files.append(file_part) + + # 检查文件路径是否匹配(支持完整路径匹配和文件名匹配) + file_basename = os.path.basename(file_path) + is_modified = (file_path in modified_files or + any(f.endswith(file_path) or f == file_basename for f in modified_files)) + self.logger.debug(f"File '{file_path}' {'was' if is_modified else 'was not'} modified in commit {commit[:8]}") + self.logger.debug(f"Modified files in commit: {modified_files}") return is_modified - + except Exception as e: self.logger.error(f"Failed to check file modification in commit: {str(e)}") return None @@ -4506,32 +4768,7 @@ class GitBisect: 构建skip结果(多候选)的数据结构 """ # Calculate job reuse rate from commit_jobs.jsonl (more reliable than instance variables) - job_request_count = 0 - job_reused_count = 0 - commit_jobs_path = os.path.join(self.temp_result_root, 'commit_jobs.jsonl') - - if os.path.exists(commit_jobs_path): - try: - with open(commit_jobs_path, 'r', encoding='utf-8') as f: - for line in f: - try: - record = json.loads(line.strip()) - job_request_count += 1 - if record.get('is_reused', False): - job_reused_count += 1 - except json.JSONDecodeError: - continue - except Exception as e: - self.logger.warning(f"Failed to calculate reuse stats from commit_jobs.jsonl: {e}") - # Fallback to instance variables if file reading fails - job_request_count = self.job_request_count - job_reused_count = self.job_reused_count - else: - # Fallback to instance variables if file doesn't exist - job_request_count = self.job_request_count - job_reused_count = self.job_reused_count - - job_reused_rate = job_reused_count / job_request_count if job_request_count > 0 else 0.0 + job_request_count, job_reused_count, job_reused_rate = self._calculate_job_reuse_stats() # 获取 commit subject(分开存储) first_bad_commit_subject = "" @@ -4590,27 +4827,25 @@ class GitBisect: normalized_bad_commit = self._normalize_commit_hash(self.bad_commit) # 调试日志:输出比较的各个值 - self.logger.debug(f"Comparing commits:") - self.logger.debug(f" first_bad_commit: '{first_bad_commit}' (len={len(first_bad_commit)})") - self.logger.debug(f" self.bad_commit: '{self.bad_commit}' (len={len(self.bad_commit)})") - self.logger.debug(f" normalized_bad_commit: '{normalized_bad_commit}' (len={len(normalized_bad_commit) if normalized_bad_commit else 0})") + self.logger.info(f"Comparing commits for is_same_commit check:") + self.logger.info(f" first_bad_commit: '{first_bad_commit}' (len={len(first_bad_commit)})") + self.logger.info(f" self.bad_commit: '{self.bad_commit}' (len={len(self.bad_commit) if self.bad_commit else 0})") + self.logger.info(f" normalized_bad_commit: '{normalized_bad_commit}' (len={len(normalized_bad_commit) if normalized_bad_commit else 0})") # 如果 first_bad_commit 就是原始的 bad_commit,直接使用 bad_job_id - # 先尝试规范化的比较,如果规范化失败则使用原始字符串比较 + # 比较时统一转为小写并 strip,避免大小写和空白字符问题 is_same_commit = False - if normalized_bad_commit and first_bad_commit == normalized_bad_commit: + fbc_clean = first_bad_commit.strip().lower() if first_bad_commit else '' + nbc_clean = normalized_bad_commit.strip().lower() if normalized_bad_commit else '' + obc_clean = self.bad_commit.strip().lower() if self.bad_commit else '' + + if nbc_clean and fbc_clean == nbc_clean: is_same_commit = True self.logger.info(f"first_bad_commit matches normalized bad_commit (resolved tag/hash)") - elif first_bad_commit == self.bad_commit: + elif fbc_clean == obc_clean: is_same_commit = True self.logger.info(f"first_bad_commit matches original bad_commit (direct string match)") - # 如果规范化的比较失败,可能是因为字符串有空格等,尝试 strip 后比较 - if not is_same_commit and normalized_bad_commit: - if first_bad_commit.strip() == normalized_bad_commit.strip(): - is_same_commit = True - self.logger.info(f"first_bad_commit matches after strip() (whitespace issue)") - if is_same_commit: first_bad_id = self.bad_job_id # 从数据库获取 result_root @@ -4624,8 +4859,15 @@ class GitBisect: else: first_bad_id, bad_result_root = self.get_id_and_result_root_by_commit(first_bad_commit) if not first_bad_id: - self.logger.error("first_bad_commit %s was not actually tested, bisect result invalid", first_bad_commit) - raise RuntimeError("Bisect result verification failed: first_bad_commit not tested") + # first_bad_commit 可能是 bisect 通过推断得出的,没有直接测试过 + # 这种情况是正常的,只是记录警告,不抛出异常 + self.logger.warning( + "first_bad_commit %s was not directly tested (bisect inferred), " + "proceeding without job_id/result_root", + first_bad_commit + ) + first_bad_id = None + bad_result_root = None # 获取 commit subject(分开存储) first_bad_commit_subject = "" @@ -4637,32 +4879,7 @@ class GitBisect: self.logger.warning(f"Failed to get subject for commit {first_bad_commit}: {e}") # Calculate job reuse rate from commit_jobs.jsonl (more reliable than instance variables) - job_request_count = 0 - job_reused_count = 0 - commit_jobs_path = os.path.join(self.temp_result_root, 'commit_jobs.jsonl') - - if os.path.exists(commit_jobs_path): - try: - with open(commit_jobs_path, 'r', encoding='utf-8') as f: - for line in f: - try: - record = json.loads(line.strip()) - job_request_count += 1 - if record.get('is_reused', False): - job_reused_count += 1 - except json.JSONDecodeError: - continue - except Exception as e: - self.logger.warning(f"Failed to calculate reuse stats from commit_jobs.jsonl: {e}") - # Fallback to instance variables if file reading fails - job_request_count = self.job_request_count - job_reused_count = self.job_reused_count - else: - # Fallback to instance variables if file doesn't exist - job_request_count = self.job_request_count - job_reused_count = self.job_reused_count - - job_reused_rate = job_reused_count / job_request_count if job_request_count > 0 else 0.0 + job_request_count, job_reused_count, job_reused_rate = self._calculate_job_reuse_stats() # Extract errid log context if this is an error bisect errid_log_context = "" @@ -4735,8 +4952,18 @@ class GitBisect: if boundary_result.get('status') == 'success': self.logger.info(f"边界验证成功 | introduced_errids: {len(boundary_result.get('introduced_errids', []))}") + # 边界验证成功,设置验证状态 + all_bisect_log['verification_status'] = 'verified' + all_bisect_log['verification_method'] = 'integrated_bisect' else: - self.logger.warning(f"边界验证失败 | reason: {boundary_result.get('error', 'unknown')}") + # 边界验证失败,设置验证失败状态 + failure_reason = boundary_result.get('verification_reason', boundary_result.get('error', 'unknown')) + self.logger.warning(f"边界验证失败 | reason: {failure_reason}") + all_bisect_log['verification_status'] = 'failed' + all_bisect_log['verification_method'] = 'integrated_bisect' + all_bisect_log['verification_failed_reason'] = failure_reason + # 设置低置信度标记 + all_bisect_log['confidence'] = 'low' except Exception as e: self.logger.error(f"边界验证异常: {str(e)}") self.logger.error(traceback.format_exc()) @@ -4744,6 +4971,8 @@ class GitBisect: 'status': 'error', 'error': str(e) } + all_bisect_log['verification_status'] = 'error' + all_bisect_log['confidence'] = 'low' # Generate and save visualization self._generate_bisect_visualization() @@ -5088,39 +5317,4 @@ class GitBisect: return valid -class JobSubmissionError(Exception): - """Custom exception for job submission failures""" - -class JobStatusError(Exception): - """Base class for job status errors""" - -class JobStatusTimeoutError(JobStatusError): - """Exception for job status polling timeouts""" - -class BisectError(Exception): - """Bisect操作的基础异常类""" - pass - -class GitOperationError(BisectError): - """Git操作相关错误""" - pass - -class CloneError(GitOperationError): - """Git克隆错误""" - def __init__(self, url, retries): - self.repo_url = url - self.retry_count = retries - super().__init__(f"Failed to clone {url} after {retries} retries") - -class CheckoutError(GitOperationError): - """Git检出错误""" - def __init__(self, commit, repo_path): - self.commit = commit - self.repo_path = repo_path - super().__init__(f"Checkout failed for {commit} in {repo_path}") - -class InvalidCommitError(GitOperationError): - """Invalid Git commit hash error""" - pass - diff --git a/programs/bisect-py/utils/batch_bisect_analysis.py b/programs/bisect-py/utils/batch_bisect_analysis.py index ffaf0524103bdae591f68806d53cfa493912fd27..b201479d1e62ececbab3799b1e1d4c3e6e3095f8 100644 --- a/programs/bisect-py/utils/batch_bisect_analysis.py +++ b/programs/bisect-py/utils/batch_bisect_analysis.py @@ -753,7 +753,7 @@ class BisectAnalysis: build_tasks, 'Build', time_window_hours, - targets={'success_rate': 0.95, 'miss_rate': 0.10, 'duplicate_rate': 0.02, 'timeliness_days': 1} + targets={'success_rate': 0.95, 'miss_rate': 0.10, 'duplicate_rate': 2.0, 'timeliness_days': 1} ) results['build'] = build_metrics @@ -763,7 +763,7 @@ class BisectAnalysis: functional_tasks, 'Functional', time_window_hours, - targets={'success_rate': 0.80, 'miss_rate': 0.20, 'duplicate_rate': 0.03, 'timeliness_days': 5} + targets={'success_rate': 0.80, 'miss_rate': 0.20, 'duplicate_rate': 2.0, 'timeliness_days': 5} ) results['functional'] = functional_metrics @@ -805,13 +805,23 @@ class BisectAnalysis: metrics['miss_details'] = miss_details metrics['miss_rate_meets_target'] = miss_rate <= targets['miss_rate'] - # 3. 重复率 (使用资源加权重复率) - # 这里的重复率指的是:未能通过复用机制(结果复用或Job复用)优化的重复任务占比 - # 目标是降低这种“无效重复”对机器资源的消耗 - duplicate_rate = self._calculate_resource_weighted_duplicate_rate(tasks) + # 3. 重复率 + # 返回值: (duplicate_rate, excess_not_reused_count, excess_duplicate_tasks) + duplicate_result = self._calculate_resource_weighted_duplicate_rate(tasks) + if isinstance(duplicate_result, tuple): + duplicate_rate, excess_not_reused_count, excess_duplicate_tasks = duplicate_result + else: + # 兼容旧版本返回值 + duplicate_rate = duplicate_result + excess_not_reused_count = 0 + excess_duplicate_tasks = [] + metrics['duplicate_rate'] = duplicate_rate metrics['duplicate_rate_percent'] = duplicate_rate * 100 - metrics['duplicate_rate_meets_target'] = duplicate_rate <= targets['duplicate_rate'] + metrics['excess_not_reused_count'] = excess_not_reused_count + metrics['excess_duplicate_tasks'] = excess_duplicate_tasks + # 达标指标:重复率百分比 <= 阈值(如 200%) + metrics['duplicate_rate_meets_target'] = metrics['duplicate_rate_percent'] <= targets['duplicate_rate'] * 100 # 4. 时效 (90%分位数,单位:天) timeliness_90_seconds, timeliness_details = self.calculate_timeliness_enhanced(tasks) @@ -821,7 +831,7 @@ class BisectAnalysis: metrics['timeliness_details'] = timeliness_details metrics['timeliness_meets_target'] = timeliness_90_days <= targets['timeliness_days'] - # 5. 总体评估 + # 5. 总体评估 (包含重复率:超额未复用数为0则达标) all_targets_met = ( metrics['success_rate_meets_target'] and metrics['miss_rate_meets_target'] and @@ -834,85 +844,151 @@ class BisectAnalysis: def _calculate_resource_weighted_duplicate_rate(self, tasks): """ - 计算资源加权重复率 (Resource Weighted Duplicate Rate) + 计算真实重复率 (True Duplicate Rate) - 定义:在所有重复任务中,那些未能有效复用已有结果(即消耗了额外机器资源)的任务占比。 + 定义:同一个 commit 被定位超过 2 次,且未复用的"额外"任务占比。 判定逻辑: - 1. 任务是重复的 (commit 出现多次,除了第一次出现的,后续都算重复)。 - 2. 任务未能有效复用资源 (Job Reuse Rate < 0.9)。 + 1. 按 first_bad_commit 分组统计每个 commit 被定位的次数 + 2. 每个 commit 第 1 次定位 → 正常(不算重复) + 3. 每个 commit 第 2 次定位 → 允许的重复(不算重复) + 4. 每个 commit 第 3 次及以上 → 检查是否复用 + - 如果已复用(is_result_reused=True 或 job_reused_rate>=0.9)→ 不算重复 + - 如果未复用 → 真正的重复(计入重复率) 公式: - 重复率 = Σ(无效复用的重复任务) / 成功的bisect任务数 + 重复率 = 未复用的超额重复任务数 / 成功的 bisect 任务数 + + 目标: + - 构建 bisect: 重复率 ≤ 2% + - 功能 bisect: 重复率 ≤ 3% :param tasks: 任务列表 - :return: 重复率 (0.0-1.0) + :return: (重复率, 超额未复用数, 超额任务详情列表) """ if not tasks: - return 0.0 + return 0.0, 0, [] # 只统计 bisect_status='success' 的任务 success_tasks = [t for t in tasks if t.get('bisect_status') == 'success'] if not success_tasks: - return 0.0 + return 0.0, 0, [] - # 1. 识别重复任务 - # 按时间排序确保先出现的任务被视为"Base",后出现的视为"Duplicate" + # 按时间排序 sorted_tasks = sorted(success_tasks, key=lambda x: x.get('submit_time', 0)) - seen_commits = set() - wasteful_duplicate_count = 0 - total_duplicate_count = 0 + # 按 commit 分组统计 + commit_tasks = {} # commit -> [task1, task2, ...] for task in sorted_tasks: commit = task.get('first_bad_commit') if not commit: continue - if commit in seen_commits: - # 这是一个重复任务 - total_duplicate_count += 1 + if commit not in commit_tasks: + commit_tasks[commit] = [] + commit_tasks[commit].append(task) + + # 统计指标 + total_duplicate_count = 0 # 总重复任务数(第 2 次及以上) + excess_duplicate_count = 0 # 超额重复任务数(第 3 次及以上) + excess_not_reused_count = 0 # 未复用的超额重复任务数(真正的浪费) + excess_duplicate_tasks = [] # 超额重复的任务详情 + + for commit, task_list in commit_tasks.items(): + count = len(task_list) + + # 第 2 次及以上都算"重复"(用于整体分析) + if count > 1: + total_duplicate_count += (count - 1) + + # 按"未复用任务"计数: + # - 第 1 个未复用 = 原始任务,OK + # - 第 2 个未复用 = 允许的重复,OK + # - 第 3 个及以上未复用 = 超额浪费 + # 所有 reused=True 的任务不计数,直接跳过 + not_reused_count = 0 # 该 commit 的未复用任务计数 + first_not_reused_task_id = None # 第一个未复用任务的 ID - # 检查资源复用情况 - # 尝试从 j 字段获取 job_reused_rate + for i, task in enumerate(task_list, start=1): j_data = task.get('j', {}) if isinstance(j_data, str): try: - import json j_data = json.loads(j_data) except: j_data = {} - if not isinstance(j_data, dict): j_data = {} - # 获取 job_reused_rate,默认为 0.0 (悲观假设,如果没有记录,视为未复用) - job_reused_rate = j_data.get('job_reused_rate', 0.0) - - # 检查是否标记为结果直接复用 (is_result_reused) + # 判断是否已复用 is_result_reused = j_data.get('is_result_reused', False) + job_reused_rate = j_data.get('job_reused_rate', 0.0) + is_reused = is_result_reused or job_reused_rate >= 0.9 - # 判定是否为"有效复用" - # 1. 显式标记为结果复用 - # 2. 或者 Job 复用率 >= 90% - is_effectively_reused = is_result_reused or (job_reused_rate >= 0.9) - - # 如果不是有效复用,则视为"资源浪费"的重复任务 - if not is_effectively_reused: - wasteful_duplicate_count += 1 - else: - # 这是该 commit 第一次出现 - seen_commits.add(commit) - - # 指标定义:占成功bisect任务数的比例 - duplicate_rate = wasteful_duplicate_count / len(success_tasks) if len(success_tasks) > 0 else 0.0 + if is_reused: + # 已复用的任务不计数,直接跳过 + continue - # 也可以返回详细信息用于调试 - logger.info(f"Duplicate Analysis: Total={len(success_tasks)} (success only), Duplicates={total_duplicate_count}, " - f"Wasteful={wasteful_duplicate_count}, Rate={duplicate_rate:.2%}") + # 未复用任务计数 + not_reused_count += 1 - return duplicate_rate + if not_reused_count == 1: + # 第 1 个未复用 = 原始任务 + first_not_reused_task_id = task.get('id') + elif not_reused_count == 2: + # 第 2 个未复用 = 允许的重复 + pass + else: + # 第 3 个及以上未复用 = 超额浪费 + excess_duplicate_count += 1 + excess_not_reused_count += 1 + excess_duplicate_tasks.append({ + 'task_id': task.get('id'), + 'bad_job_id': task.get('bad_job_id'), + 'commit': commit[:12] if len(commit) >= 12 else commit, + 'not_reused_occurrence': not_reused_count, # 第几个未复用任务 + 'total_tasks': count, # 该 commit 总共有几个任务 + 'first_not_reused_task_id': first_not_reused_task_id, # 第一个未复用任务的 ID + 'job_reused_rate': job_reused_rate, + 'is_result_reused': is_result_reused + }) + + # 重复率 = 未复用的超额重复任务数 / 成功任务总数 + duplicate_rate = excess_not_reused_count / len(success_tasks) if len(success_tasks) > 0 else 0.0 + + # 日志输出 - 整体分析 + unique_commits = len(commit_tasks) + logger.info(f"Duplicate Analysis: Total={len(success_tasks)} (success only), " + f"UniqueCommits={unique_commits}, " + f"TotalDuplicates={total_duplicate_count} (2nd+ task), " + f"ExcessNotReused={excess_not_reused_count} (3rd+ not-reused), " + f"Rate={duplicate_rate:.2%}") + + # 输出未复用的超额重复任务详情(真正需要关注的) + if excess_duplicate_tasks: + logger.info(f"Excess not-reused tasks (3rd+ not-reused per commit):") + for et in excess_duplicate_tasks: + logger.info(f" - bisect_id: {et['task_id']}, " + f"bad_job_id: {et['bad_job_id']}, " + f"commit: {et['commit']}, " + f"not_reused_nth: {et['not_reused_occurrence']}, " + f"total_tasks: {et['total_tasks']}, " + f"first_not_reused_id: {et['first_not_reused_task_id']}, " + f"job_reused_rate: {et['job_reused_rate']:.2f}") + + # 输出 commit 重复分布(用于整体分析) + commits_with_duplicates = [(c, len(t)) for c, t in commit_tasks.items() if len(t) > 1] + if commits_with_duplicates: + commits_with_duplicates.sort(key=lambda x: x[1], reverse=True) + logger.info(f"Commit task distribution (for reference):") + for commit, count in commits_with_duplicates[:10]: # 只显示前 10 个 + commit_short = commit[:12] if len(commit) >= 12 else commit + logger.info(f" - {commit_short}: {count} tasks") + if len(commits_with_duplicates) > 10: + logger.info(f" ... and {len(commits_with_duplicates) - 10} more commits with multiple tasks") + + return duplicate_rate, excess_not_reused_count, excess_duplicate_tasks def _calculate_single_category_duplicate_rate(self, tasks): """ @@ -1304,16 +1380,6 @@ class BisectAnalysis: f" └─ Failed: {miss_details.get('unsuccessful_bisect_count', 0)} ({miss_details.get('miss_rate_unsuccessful_bisect', 0)*100:.1f}%)", f" Bisect not started: {miss_details.get('bisect_not_started_count', 0)} (wait/processing)", ]) - - # Add duplicate rate by category - output.append("\nDuplicate Rate by Category:") - duplicate_by_category = analysis.get('duplicate_by_category', {}) - if duplicate_by_category: - for category, stats in duplicate_by_category.items(): - rate = stats.get('duplicate_rate', 0) * 100 - output.append(f" {category}: {rate:.1f}% ({stats.get('total_duplicate_tasks', 0)} duplicate tasks out of {stats.get('total_tasks', 0)})") - else: - output.append(" No category data available") output.extend([ "", @@ -1417,7 +1483,14 @@ class BisectAnalysis: for commit, count in sorted_commits: # 使用 change_point 进行显示 - display = commit_to_change_point.get(commit, commit[:12]) + if commit in commit_to_change_point: + display = commit_to_change_point[commit] + elif len(commit) == 40: + # 完整 SHA,显示前 12 位 + display = commit[:12] + else: + # 短 SHA(历史数据),原样显示并标记 + display = f"{commit} (short-sha)" output.append(f" {display}: {count} times") # NEW: Category Metrics Report @@ -1464,18 +1537,18 @@ class BisectAnalysis: mr_status = "✓ PASS" if metrics['miss_rate_meets_target'] else "✗ FAIL" output.append(f" - Miss Rate: {mr:.2f}% (target: ≤{mr_target:.0f}%) {mr_status}") - # Duplicate Rate - dr = metrics['duplicate_rate_percent'] - dr_target = targets['duplicate_rate'] * 100 - dr_status = "✓ PASS" if metrics['duplicate_rate_meets_target'] else "✗ FAIL" - output.append(f" - Duplicate Rate: {dr:.2f}% (target: ≤{dr_target:.0f}%) {dr_status}") - # Timeliness tl = metrics['timeliness_90_days'] tl_target = targets['timeliness_days'] tl_status = "✓ PASS" if metrics['timeliness_meets_target'] else "✗ FAIL" output.append(f" - Timeliness (90%): {tl:.2f} days (target: ≤{tl_target} day(s)) {tl_status}") + # Duplicate Rate (百分比) + dr = metrics['duplicate_rate_percent'] + dr_target = targets['duplicate_rate'] * 100 + dr_status = "✓ PASS" if metrics['duplicate_rate_meets_target'] else "✗ FAIL" + output.append(f" - Duplicate Rate: {dr:.2f}% (target: ≤{dr_target:.0f}%) {dr_status}") + output.append("") # Overall Assessment @@ -1487,10 +1560,10 @@ class BisectAnalysis: failed_metrics.append("Success Rate") if not metrics['miss_rate_meets_target']: failed_metrics.append("Miss Rate") - if not metrics['duplicate_rate_meets_target']: - failed_metrics.append("Duplicate Rate") if not metrics['timeliness_meets_target']: failed_metrics.append("Timeliness") + if not metrics['duplicate_rate_meets_target']: + failed_metrics.append("Duplicate Rate") output.append(f"Overall Assessment: ✗ FAILED ({', '.join(failed_metrics)})") output.append("") @@ -1878,11 +1951,7 @@ def main(): analysis['timeliness_90'] = timeliness_90 analysis['timeliness_details'] = timeliness_details - # Duplicate rate by category - duplicate_by_category = analyzer.calculate_duplicate_rate_by_category(tasks) - analysis['duplicate_by_category'] = duplicate_by_category - - # NEW: Calculate metrics by category (Build vs Functional) + # Calculate metrics by category (Build vs Functional) category_metrics = analyzer.calculate_metrics_by_category(tasks, time_window_hours=args.hours) analysis['category_metrics'] = category_metrics else: @@ -1892,7 +1961,6 @@ def main(): analysis['success_details'] = {} analysis['timeliness_90'] = 0 analysis['timeliness_details'] = {} - analysis['duplicate_by_category'] = {} analysis['category_metrics'] = {} # Format output