feat: Enhance deployment workflow and improve stats processing with session management and artifact preparation

This commit is contained in:
zhangyuheng
2026-03-10 10:31:51 +08:00
parent 56230e8b22
commit b094a10f06
2 changed files with 111 additions and 64 deletions

View File

@@ -38,10 +38,20 @@ jobs:
STATS_PASS: ${{ secrets.STATS_PASS }} STATS_PASS: ${{ secrets.STATS_PASS }}
run: python statsprocess.py run: python statsprocess.py
- name: Prepare Pages artifact
run: |
mkdir -p _site
rsync -a --delete \
--exclude '.git/' \
--exclude '.github/' \
--exclude 'README.md' \
--exclude 'statsprocess.py' \
./ ./_site/
- name: Upload Pages artifact - name: Upload Pages artifact
uses: actions/upload-pages-artifact@v3 uses: actions/upload-pages-artifact@v3
with: with:
path: . path: ./_site
deploy: deploy:
needs: build needs: build

View File

@@ -1,32 +1,50 @@
import os import os
import json import json
import threading
import requests import requests
from requests.adapters import HTTPAdapter from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry from urllib3.util.retry import Retry
import re import re
import time from concurrent.futures import ThreadPoolExecutor, as_completed
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime from datetime import datetime
from tqdm import tqdm # Add tqdm for progress bars from tqdm import tqdm # Add tqdm for progress bars
BASE_URL = "http://x2.sjcmc.cn:15960/stats/"
STATS_DIR = "stats" STATS_DIR = "stats"
MAX_WORKERS = max(4, min(16, int(os.environ.get("STATS_MAX_WORKERS", (os.cpu_count() or 4) * 2))))
# HTTP Basic Auth for BASE_URL (from environment variables) # HTTP Basic Auth for BASE_URL (from environment variables)
BASE_URL = os.environ.get("STATS_BASE_URL", "")
STATS_USER = os.environ.get("STATS_USER", "") STATS_USER = os.environ.get("STATS_USER", "")
STATS_PASS = os.environ.get("STATS_PASS", "") STATS_PASS = os.environ.get("STATS_PASS", "")
BASE_AUTH = (STATS_USER, STATS_PASS) if STATS_USER else None BASE_AUTH = (STATS_USER, STATS_PASS) if STATS_USER else None
# Create a session that bypasses system proxy and retries on failure
session = requests.Session()
session.trust_env = False # Ignore HTTP_PROXY / HTTPS_PROXY env vars
retry_strategy = Retry( retry_strategy = Retry(
total=3, total=3,
backoff_factor=1, backoff_factor=1,
status_forcelist=[429, 500, 502, 503, 504], status_forcelist=[429, 500, 502, 503, 504],
) )
session.mount("http://", HTTPAdapter(max_retries=retry_strategy)) thread_local = threading.local()
session.mount("https://", HTTPAdapter(max_retries=retry_strategy))
def create_session():
session = requests.Session()
session.trust_env = False # Ignore HTTP_PROXY / HTTPS_PROXY env vars
adapter = HTTPAdapter(
max_retries=retry_strategy,
pool_connections=MAX_WORKERS,
pool_maxsize=MAX_WORKERS,
)
session.mount("http://", adapter)
session.mount("https://", adapter)
return session
def get_session():
session = getattr(thread_local, "session", None)
if session is None:
session = create_session()
thread_local.session = session
return session
if BASE_AUTH: if BASE_AUTH:
print(f"Using authentication for BASE_URL (user: {STATS_USER})") print(f"Using authentication for BASE_URL (user: {STATS_USER})")
@@ -37,47 +55,87 @@ else:
os.makedirs(STATS_DIR, exist_ok=True) os.makedirs(STATS_DIR, exist_ok=True)
print("Fetching file list...") print("Fetching file list...")
fetch_failed = False
try: try:
response = session.get(BASE_URL, timeout=10, auth=BASE_AUTH) response = get_session().get(BASE_URL, timeout=10, auth=BASE_AUTH)
response.raise_for_status() response.raise_for_status()
content = response.text content = response.text
# Regex for UUID.json # Regex for UUID.json
files = re.findall(r'href="([0-9a-f-]{36}\.json)"', content) files = sorted(set(re.findall(r'href="([0-9a-f-]{36}\.json)"', content)))
files = list(set(files))
print(f"Found {len(files)} player stats files.") print(f"Found {len(files)} player stats files.")
except Exception as e: except Exception as e:
print(f"Error fetching file list: {e}") print(f"Error fetching file list: {e}")
files = [] files = []
fetch_failed = True
def load_name_cache():
summary_path = os.path.join(STATS_DIR, 'summary.json')
if not os.path.exists(summary_path):
return {}
try:
with open(summary_path, 'r', encoding='utf-8') as f:
summary = json.load(f)
except Exception:
return {}
return {
player.get('uuid'): player.get('name')
for player in summary.get('players', [])
if player.get('uuid') and player.get('name') and player.get('name') != "Unknown"
}
def get_player_name(uuid): def get_player_name(uuid):
# Try Ashcon first # Try Ashcon first
try: try:
r = session.get(f"https://api.ashcon.app/mojang/v2/user/{uuid}", timeout=5) r = get_session().get(f"https://api.ashcon.app/mojang/v2/user/{uuid}", timeout=5)
if r.status_code == 200: if r.status_code == 200:
return r.json().get('username') return r.json().get('username')
except: except Exception:
pass pass
# Try Mojang Session # Try Mojang Session
try: try:
r = session.get(f"https://sessionserver.mojang.com/session/minecraft/profile/{uuid}", timeout=5) r = get_session().get(f"https://sessionserver.mojang.com/session/minecraft/profile/{uuid}", timeout=5)
if r.status_code == 200: if r.status_code == 200:
return r.json().get('name') return r.json().get('name')
except: except Exception:
pass pass
return "Unknown" return "Unknown"
def process_player(filename):
def format_dist(cm):
m = cm / 100
if m < 1000:
return f"{m:.1f} m"
return f"{m / 1000:.2f} km"
def format_time(ticks):
seconds = ticks / 20
if seconds < 60:
return f"{seconds:.3f}"
minutes = seconds / 60
if minutes < 60:
return f"{minutes:.3f} 分钟"
hours = minutes / 60
if hours < 24:
return f"{hours:.3f} 小时"
days = hours / 24
return f"{days:.3f}"
def process_player(filename, name_cache):
uuid = filename.replace(".json", "") uuid = filename.replace(".json", "")
json_path = os.path.join(STATS_DIR, filename) json_path = os.path.join(STATS_DIR, filename)
# 1. Download/Load JSON # 1. Download/Load JSON
data = None data = None
try: try:
# Check if we already have it locally and it's valid, maybe skip download? r = get_session().get(BASE_URL + filename, timeout=10, auth=BASE_AUTH)
# User implies fetching updates, so we download.
r = session.get(BASE_URL + filename, timeout=10, auth=BASE_AUTH)
if r.status_code == 200: if r.status_code == 200:
data = r.json() data = r.json()
else: else:
@@ -91,26 +149,10 @@ def process_player(filename):
return None return None
# 2. Get Name # 2. Get Name
# We can check if name is already in the processing file to avoid API calls if scraping repeatedly? player_name = name_cache.get(uuid, "Unknown")
# For this task, we assume we need to fetch it.
# To save API calls, we could check if we have a saved version with a name.
player_name = "Unknown"
# Check if 'extra' exists in downloaded data (unlikely if strictly from server)
# But checking if we have a local cache of this file with a name is smart
if os.path.exists(json_path):
try:
with open(json_path, 'r', encoding='utf-8') as f:
local_data = json.load(f)
if 'extra' in local_data and local_data['extra'].get('player_name') != "Unknown":
player_name = local_data['extra']['player_name']
except:
pass
if player_name == "Unknown": if player_name == "Unknown":
player_name = get_player_name(uuid) player_name = get_player_name(uuid)
# Sleep slightly to be nice to APIs if meaningful massive parallel
time.sleep(0.1)
# 3. Download Avatar - SKIPPED to avoid rate limits # 3. Download Avatar - SKIPPED to avoid rate limits
# The frontend will handle dynamic loading of avatars using Minotar/Crafatar URLs. # The frontend will handle dynamic loading of avatars using Minotar/Crafatar URLs.
@@ -126,31 +168,11 @@ def process_player(filename):
custom = stats.get('minecraft:custom', {}) custom = stats.get('minecraft:custom', {})
walk_cm = custom.get('minecraft:walk_one_cm', 0) walk_cm = custom.get('minecraft:walk_one_cm', 0)
def format_dist(cm):
m = cm / 100
if m < 1000:
return f"{m:.1f} m"
else:
return f"{m/1000:.2f} km"
walk_fmt = format_dist(walk_cm) walk_fmt = format_dist(walk_cm)
# Play Time (1 tick = 1/20 second) # Play Time (1 tick = 1/20 second)
play_time_ticks = custom.get('minecraft:play_time', 0) play_time_ticks = custom.get('minecraft:play_time', 0)
def format_time(ticks):
seconds = ticks / 20
if seconds < 60:
return f"{seconds:.3f}"
minutes = seconds / 60
if minutes < 60:
return f"{minutes:.3f} 分钟"
hours = minutes / 60
if hours < 24:
return f"{hours:.3f} 小时"
days = hours / 24
return f"{days:.3f}"
play_time_fmt = format_time(play_time_ticks) play_time_fmt = format_time(play_time_ticks)
# Mined # Mined
@@ -202,14 +224,29 @@ def process_player(filename):
} }
} }
# Process sequentially with progress bar
name_cache = load_name_cache()
results = [] results = []
if files: if files:
for filename in tqdm(files, desc="Processing players"): with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
result = process_player(filename) future_map = {
executor.submit(process_player, filename, name_cache): filename
for filename in files
}
for future in tqdm(as_completed(future_map), total=len(future_map), desc="Processing players"):
try:
result = future.result()
except Exception as e:
print(f"Worker failed for {future_map[future]}: {e}")
continue
if result is not None: if result is not None:
results.append(result) results.append(result)
if fetch_failed:
print("Skipping summary update because file list fetch failed.")
raise SystemExit(1)
# Sort by name perhaps? Or just raw list. # Sort by name perhaps? Or just raw list.
results.sort(key=lambda x: x['name']) results.sort(key=lambda x: x['name'])