#!/bin/bash
# example script for downloading Jiutian mock catalogs.
# this version is modified from a script generated by deepseek.
# ask your favorite AI if you want to further optimize the script, or parallize the task.
# Jiaxin Han, 2025/09/24

# ============= modify this part to specify which data to download ============
CatType="hbt"  #options: "hbt", "gaea", "lightcone/deep", "lightcone/ultradeep"
Simu="M2G" #options: "M1G", "M2G"
# output directory. modify as needed
DownloadDir="$CatType/$Simu"
#==============================================================================

JiutianURL="https://jiutian.sjtu.edu.cn"
BASEURL="$JiutianURL/download/primary/gaea-hbt/$CatType/$Simu"
CHECKSUM_FILE="checksums.sha256"

mkdir -p $DownloadDir

# Download the checksum file using IPv4 in quiet mode to $DownloadDir. This file contains the list of data files, and their sha256 checksum for data verification.
# Remove the -4 option if IPv6 network also works for you.
echo "Downloading checksum file..."
wget -4 -nv -O $DownloadDir/$CHECKSUM_FILE "$BASEURL/$CHECKSUM_FILE"

# Verify checksums and count results
ok_count=0
fail_count=0
skip_count=0

# Read and process each line from the CHECKSUM_FILE
while read -r line; do
    # Skip empty lines and comments
    [[ -z "$line" || "$line" =~ ^# ]] && continue
    
    # Extract checksum and filename
    checksum=$(echo "$line" | awk '{print $1}')
    filename=$(echo "$line" | awk '{print $2}' | sed 's|^\./||')
    
    # Skip if we couldn't parse the line
    if [[ -z "$checksum" || -z "$filename" ]]; then
        echo "? Skipping unparsable line: $line"
        ((skip_count++))
        continue
    fi
    
    # Check if file already exists
    if [ -f "$DownloadDir/$filename" ]; then
        echo "File exists: $filename - verifying checksum..."
        actual_checksum=$(sha256sum "$DownloadDir/$filename" | awk '{print $1}')
        
        if [ "$actual_checksum" = "$checksum" ]; then
            echo "✓ Checksum OK, skipping download: $filename"
            ((ok_count++))
            ((skip_count++))
            continue
        else
            echo "✗ Checksum mismatch, re-downloading: $filename"
            # Remove the corrupted file
            rm -f "$DownloadDir/$filename"
        fi
    fi
    
    echo "Downloading: $filename"
    
    # Download the file 
    wget -4 -nv -P $DownloadDir "$BASEURL/$filename"
    
    # Verify checksum
    actual_checksum=$(sha256sum "$DownloadDir/$filename" | awk '{print $1}')
    if [ "$actual_checksum" = "$checksum" ]; then
        echo "✓ Checksum OK: $filename"
        ((ok_count++))
    else
        echo "✗ Checksum FAIL: $filename"
        ((fail_count++))
    fi
    
done < "$DownloadDir/$CHECKSUM_FILE"

# Print summary
echo ""
echo "=== SUMMARY ==="
echo "`wc -l $DownloadDir/$CHECKSUM_FILE` lines processed in checksum file"
echo "✓ Verified/Downloaded: $ok_count"
echo "? Skipped: $skip_count (includes existing files with good checksums and unparsable lines)"
echo "✗ FAILED: $fail_count"
echo "==============="
