#!/usr/bin/env bash

set -e

if [ $# -lt 1 ]; then
    echo "Usage: get-pr-comments PR_NUMBER [OWNER/REPO]"
    echo "Example: get-pr-comments 123"
    echo "Example: get-pr-comments 123 EveryInc/cora"
    exit 1
fi

PR_NUMBER=$1

if [ -n "$2" ]; then
    OWNER=$(echo "$2" | cut -d/ -f1)
    REPO=$(echo "$2" | cut -d/ -f2)
else
    OWNER=$(gh repo view --json owner -q .owner.login 2>/dev/null)
    REPO=$(gh repo view --json name -q .name 2>/dev/null)
fi

if [ -z "$OWNER" ] || [ -z "$REPO" ]; then
    echo "Error: Could not detect repository. Pass OWNER/REPO as second argument."
    exit 1
fi

# Fetch review threads, regular PR comments, and review bodies in one query.
# Output is a JSON object with four keys:
#   review_threads   - unresolved, non-outdated inline code review threads
#   pr_comments      - top-level PR conversation comments (excludes PR author and known review bots)
#   review_bodies    - review submissions with non-empty body text (excludes PR author and known review bots)
#   cross_invocation - cross-invocation awareness envelope:
#     signal: true when both resolved and unresolved threads exist (multi-round review)
#     resolved_threads: last N resolved threads by recency, for cluster analysis input
#
# Bot filtering: only CI/status bots (codecov, etc.) are filtered at the source.
# Their output is structurally never actionable -- coverage numbers, build
# summaries, deploy status -- and that holds regardless of format changes.
# AI review bots (coderabbitai, codex, gemini, copilot) are NOT filtered here.
# Historically their top-level comments were assumed to always be wrappers, but
# that turned out to be wrong: Codex sometimes posts actionable findings as
# top-level PR comments with no inline thread counterpart. Any source-level
# heuristic to separate wrapper from actionable for these bots is brittle (one
# bot format change away from silently dropping feedback). SKILL.md step 2
# has a content-aware actionability check and Silent Drop rule that handles
# wrappers correctly, so we trust that layer instead. Add new logins to the CI
# list only if their output is structurally non-actionable like codecov's.
gh api graphql -f owner="$OWNER" -f repo="$REPO" -F pr="$PR_NUMBER" -f query='
query FetchPRFeedback($owner: String!, $repo: String!, $pr: Int!) {
  repository(owner: $owner, name: $repo) {
    pullRequest(number: $pr) {
      author { login }
      reviewThreads(first: 50) {
        edges {
          node {
            id
            isResolved
            isOutdated
            path
            line
            originalLine
            startLine
            originalStartLine
            comments(first: 10) {
              nodes {
                id
                author { login }
                body
                createdAt
                url
              }
            }
          }
        }
      }
      comments(first: 100) {
        nodes {
          id
          author { login }
          body
        }
      }
      reviews(first: 50) {
        nodes {
          id
          author { login }
          body
          state
        }
      }
    }
  }
}' | jq '.data.repository.pullRequest as $pr |
  # Structurally non-actionable bot output; always dropped.
  ["codecov"] as $ci_bot_logins |
  # Unresolved threads. `isOutdated` means the diff hunk around the comment
  # has shifted since the thread was opened -- not that the reviewer concern
  # was addressed. Resolution state is the only authoritative signal; outdated
  # threads are still surfaced (with their isOutdated flag intact) so the
  # resolver can factor in that the referenced line may have moved.
  [$pr.reviewThreads.edges[]
    | select(.node.isResolved == false)] as $unresolved |
  # Resolved threads for cross-invocation awareness (last 10 by most recent comment)
  [$pr.reviewThreads.edges[]
    | select(.node.isResolved == true)
    | { thread_id: .node.id, path: .node.path, line: .node.line,
        first_comment_body: .node.comments.nodes[0].body,
        last_comment_at: ([.node.comments.nodes[].createdAt] | sort | last) }]
    | sort_by(.last_comment_at) | .[-10:] | reverse as $resolved |
{
  review_threads: $unresolved,
  pr_comments: [$pr.comments.nodes[]
    | select(.author.login != $pr.author.login)
    | select(
        .author.login as $l | $ci_bot_logins | index($l) | not
      )
    | select(.body | test("^\\s*$") | not)],
  review_bodies: [$pr.reviews.nodes[]
    | select(.body != null and .body != "")
    | select(.author.login != $pr.author.login)
    | select(
        .author.login as $l | $ci_bot_logins | index($l) | not
      )],
  cross_invocation: {
    signal: (($resolved | length) > 0 and ($unresolved | length) > 0),
    resolved_threads: $resolved
  }
}'
