ci: Add script &CI to check dead links #28

Summary
Jobs
- check-links
Run details
- Usage
- Workflow file

Workflow file for this run

.github/workflows/check-broken-links.yaml at dd4139c

	name: Check Dead Links

	on:
	pull_request:
	types: [opened, synchronize, reopened]

	jobs:
	check-links:
	runs-on: ubuntu-latest

	steps:
	- name: Checkout repository
	uses: actions/checkout@v3

	- name: Verify curl installation
	run: curl --version

	- name: Extract and clean URLs from all documentation
	id: extract_urls
	run: \|
	FILE_EXTENSIONS=".md .html *.txt"

	REGEX='https?://[^\s)"'"'"'<`:,]+'

	find . $ -name ".md" -o -name ".html" -o -name "*.txt" $ -type f -print0 \| \
	xargs -0 grep -oP "$REGEX" > urls.txt \|\| true

	sort -u urls.txt -o urls.txt

	echo "Total URLs found: $(wc -l < urls.txt)"

	if [ -f exclude_patterns.txt ]; then
	EXCLUDE_REGEX=$(paste -sd'\|' exclude_patterns.txt)
	grep -vE "$EXCLUDE_REGEX" urls.txt > filtered_urls.txt
	else
	echo "exclude_patterns.txt not found. No URLs will be excluded."
	cp urls.txt filtered_urls.txt
	fi

	echo "Total URLs after exclusion: $(wc -l < filtered_urls.txt)"

	sed -E 's/[">,)]+$//' filtered_urls.txt > cleaned_urls.txt

	echo "Total URLs after cleaning: $(wc -l < cleaned_urls.txt)"

	mv cleaned_urls.txt filtered_urls.txt

	- name: Print URLs to be checked
	run: \|
	echo "===== URLs to be checked ====="
	cat filtered_urls.txt
	echo "=============================="

	- name: Check if URLs were found
	run: \|
	if [ ! -s filtered_urls.txt ]; then
	echo "No URLs found to check after applying exclusions."
	exit 0
	fi

	- name: Check URLs using curl
	shell: bash
	run: \|
	set +e

	TOTAL=0
	FAILED=0
	DEAD_LINKS=()

	while IFS= read -r url; do
	TOTAL=$((TOTAL +1))
	echo "[$TOTAL] Checking URL: $url"

	HTTP_STATUS=$(curl -o /dev/null -s -w "%{http_code}" -L --connect-timeout 10 "$url" \|\| echo "000")

	if [[ "$HTTP_STATUS" -ge 400 \|\| "$HTTP_STATUS" -eq "000" ]]; then
	echo "❌ Dead link found: $url (HTTP status: $HTTP_STATUS)"
	DEAD_LINKS+=("$url")
	FAILED=$((FAILED +1))
	else
	echo "✅ Link is valid: $url (HTTP status: $HTTP_STATUS)"
	fi
	done < filtered_urls.txt

	echo "Total links checked: $TOTAL"
	echo "Dead links found: $FAILED"

	if [ "$FAILED" -ne 0 ]; then
	echo "::error::Found $FAILED dead links."
	for dead in "${DEAD_LINKS[@]}"; do
	echo "::error::Dead link: $dead"
	done

	printf "Found %d dead links:\n" "$FAILED" > dead_links.md
	for dead in "${DEAD_LINKS[@]}"; do
	printf "- %s\n" "$dead" >> dead_links.md
	done

	cat dead_links.md

	exit 1
	else
	echo "All $TOTAL links are valid."
	fi

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

ci: Add script &CI to check dead links #28

Workflow file

ci: Add script &CI to check dead links #28

Jobs

Run details

Workflow file for this run