feat: enhanced token probe with diagnostics - tests Azure and GitHub endpoints, logs token source

2025-12-03 06:50:39 -07:00 · 2025-12-03 06:50:39 -07:00 · b50675d667
parent 40d64c8b69
commit b50675d667
1 changed files with 72 additions and 61 deletions
--- a/.github/workflows/llm-hello-world.yaml
+++ b/.github/workflows/llm-hello-world.yaml
@ -155,76 +155,87 @@ jobs:
    steps:
      - name: Probe token limits
        env:
-          GITHUB_TOKEN: ${{ secrets.COPILOT_PAT || secrets.GITHUB_TOKEN }}
+          AUTH_TOKEN: ${{ secrets.COPILOT_PAT }}
+          FALLBACK_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          # Try Azure endpoint (Gemini's suggestion) - deprecated Oct 2025 but may have different limits
+          LLM_ENDPOINT_AZURE: "https://models.inference.ai.azure.com/chat/completions"
+          LLM_ENDPOINT_GITHUB: "https://models.github.ai/inference/chat/completions"
        run: |
          echo "=== Token Limit Probe ==="
-          echo "Testing progressively larger payloads to find the ceiling"
-          echo ""

-          # Test sizes in characters (roughly 4 chars per token)
-          SIZES=(2000 4000 8000 16000 32000 64000)
+          # Diagnostic: which token are we using?
+          if [ -n "$AUTH_TOKEN" ]; then
+            echo "Using: COPILOT_PAT (length: ${#AUTH_TOKEN} chars)"
+            TOKEN="$AUTH_TOKEN"
+          else
+            echo "Using: GITHUB_TOKEN (fallback)"
+            TOKEN="$FALLBACK_TOKEN"
+          fi

-          # Generate filler text (lorem ipsum style)
-          generate_filler() {
-            local size=$1
-            python3 -c "print(('The quick brown fox jumps over the lazy dog. ' * ($size // 45 + 1))[:$size])"
-          }
-
-          echo "| Size (chars) | ~Tokens | HTTP Status | Result |"
-          echo "|--------------|---------|-------------|--------|"
-
-          for SIZE in "${SIZES[@]}"; do
-            APPROX_TOKENS=$((SIZE / 4))
-
-            # Generate payload of specified size
-            FILLER=$(generate_filler $SIZE)
-
-            PROMPT="Respond with exactly one word: 'received'. Here is padding text to test token limits: $FILLER"
-            PROMPT_ESCAPED=$(echo "$PROMPT" | jq -Rs .)
-
-            # Call the LLM
-            RESPONSE=$(curl -s -w "\n%{http_code}" -X POST "$LLM_ENDPOINT" \
-              -H "Authorization: Bearer $GITHUB_TOKEN" \
-              -H "Content-Type: application/json" \
-              -d "{
-                \"model\": \"$LLM_MODEL\",
-                \"messages\": [
-                  {\"role\": \"user\", \"content\": $PROMPT_ESCAPED}
-                ],
-                \"max_tokens\": 10
-              }" 2>/dev/null)
-
-            HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
-            BODY=$(echo "$RESPONSE" | sed '$d')
-
-            if [ "$HTTP_CODE" = "200" ]; then
-              RESULT="OK"
+          # Test both endpoints
+          for ENDPOINT_NAME in "AZURE" "GITHUB"; do
+            if [ "$ENDPOINT_NAME" = "AZURE" ]; then
+              ENDPOINT="$LLM_ENDPOINT_AZURE"
            else
-              # Extract error message if present
-              ERROR_MSG=$(echo "$BODY" | jq -r '.error.message // .message // "Unknown error"' 2>/dev/null | head -c 50)
-              RESULT="FAIL: $ERROR_MSG"
-
-              # Log full error for debugging
-              echo ""
-              echo "::warning::Failed at ${SIZE} chars (~${APPROX_TOKENS} tokens)"
-              echo "Full error response:"
-              echo "$BODY" | jq . 2>/dev/null || echo "$BODY"
-              echo ""
+              ENDPOINT="$LLM_ENDPOINT_GITHUB"
            fi

-            echo "| $SIZE | ~$APPROX_TOKENS | $HTTP_CODE | $RESULT |"
+            echo ""
+            echo "=== Testing $ENDPOINT_NAME endpoint ==="
+            echo "URL: $ENDPOINT"
+            echo "Model: gpt-4o"
+            echo ""

-            # Stop probing after first failure to avoid hammering the API
-            if [ "$HTTP_CODE" != "200" ]; then
-              echo ""
-              echo "=== Ceiling Found ==="
-              echo "Last successful size: $((SIZE / 2)) chars (~$((SIZE / 8)) tokens)"
-              echo "First failure at: $SIZE chars (~$APPROX_TOKENS tokens)"
-              break
-            fi
+            # Test sizes in characters (roughly 4 chars per token)
+            SIZES=(2000 4000 8000 16000 32000 64000 128000)

-            # Small delay to avoid rate limiting
-            sleep 1
+            echo "| Size (chars) | ~Tokens | HTTP Status | Result |"
+            echo "|--------------|---------|-------------|--------|"
+
+            for SIZE in "${SIZES[@]}"; do
+              APPROX_TOKENS=$((SIZE / 4))
+
+              # Generate filler
+              FILLER=$(python3 -c "print(('The quick brown fox jumps over the lazy dog. ' * ($SIZE // 45 + 1))[:$SIZE])")
+
+              PROMPT="Respond with exactly one word: 'received'. Here is padding text to test token limits: $FILLER"
+              PROMPT_ESCAPED=$(echo "$PROMPT" | jq -Rs .)
+
+              RESPONSE=$(curl -s -w "\n%{http_code}" -X POST "$ENDPOINT" \
+                -H "Authorization: Bearer $TOKEN" \
+                -H "Content-Type: application/json" \
+                -d "{
+                  \"model\": \"gpt-4o\",
+                  \"messages\": [
+                    {\"role\": \"user\", \"content\": $PROMPT_ESCAPED}
+                  ],
+                  \"max_tokens\": 10
+                }" 2>/dev/null)
+
+              HTTP_CODE=$(echo "$RESPONSE" | tail -n1)
+              BODY=$(echo "$RESPONSE" | sed '$d')
+
+              if [ "$HTTP_CODE" = "200" ]; then
+                RESULT="OK"
+              else
+                ERROR_MSG=$(echo "$BODY" | jq -r '.error.message // .message // "Unknown error"' 2>/dev/null | head -c 50)
+                RESULT="FAIL: $ERROR_MSG"
+                echo ""
+                echo "::warning::Failed at ${SIZE} chars (~${APPROX_TOKENS} tokens) on $ENDPOINT_NAME"
+                echo "Full error: $BODY"
+                echo ""
+              fi
+
+              echo "| $SIZE | ~$APPROX_TOKENS | $HTTP_CODE | $RESULT |"
+
+              if [ "$HTTP_CODE" != "200" ]; then
+                echo ""
+                echo "=== $ENDPOINT_NAME Ceiling: ~$((SIZE / 8)) tokens ==="
+                break
+              fi
+
+              sleep 1
+            done
          done

          echo ""