download-video-subs/fetch-feeds.sh
File Type: text/x-shellscript
#!/bin/bash
# This script reads a list of RSS feed URLs from a text file (one URL per line),
# fetches the 5 most recent item URLs and titles from each feed,
# and generates a cleanly presented HTML document with links.
# Usage: ./script.sh [rssfeeds.txt rssfeeds.html]
# - If no arguments provided, defaults to "rssfeeds.txt" as input and "rssfeeds.html" as output in the script's directory.
# - rssfeeds.txt: Input file with RSS feed URLs (one per line)
# - rssfeeds.html: Output HTML file (will be overwritten)
# Requirements:
# - curl: For fetching RSS feeds
# - xmlstarlet: For parsing XML (install via apt/yum/brew if needed: e.g., sudo apt install xmlstarlet)
# - GNU date: For date formatting (may require gdate on macOS)
if [ $# -eq 0 ]; then
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
INPUT_FILE="$SCRIPT_DIR/rssfeeds.txt"
OUTPUT_FILE="$SCRIPT_DIR/rssfeeds.html"
elif [ $# -eq 2 ]; then
INPUT_FILE="$1"
OUTPUT_FILE="$2"
else
echo "Usage: $0 [input_feeds_file output_html_file]"
echo "If no arguments, defaults to rssfeeds.txt and rssfeeds.html in the script's directory"
exit 1
fi
current_epoch=$(date +%s)
# Start HTML document
cat << EOF > "$OUTPUT_FILE"
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>RSS Feed Recent Items</title>
<style>
body { font-family: Arial, sans-serif; margin: 20px; }
h1 { text-align: center; }
h2 { color: #333; }
ul { list-style-type: none; padding: 0; }
li { margin-bottom: 10px; }
a { text-decoration: none; color: #007bff; }
a:hover { text-decoration: underline; }
.new a { color: orangered; }
</style>
</head>
<body>
<h1>Recent Items from RSS Feeds</h1>
EOF
# Process each feed
while IFS= read -r feed; do
if [ -z "$feed" ]; then continue; fi # Skip empty lines
if [[ "$feed" == \#* ]]; then continue; fi # Skip lines starting with #
echo "Processing feed: $feed"
# Fetch the feed content once
FEED_CONTENT=$(curl -s "$feed")
# Extract feed title
TITLE=$(echo "$FEED_CONTENT" | xmlstarlet sel -t -v "//channel/title" 2>/dev/null)
if [ -z "$TITLE" ]; then
TITLE=$(echo "$FEED_CONTENT" | xmlstarlet sel -N a="http://www.w3.org/2005/Atom" -t -v "//a:feed/a:title" 2>/dev/null)
fi
if [ -z "$TITLE" ]; then
TITLE="$feed"
fi
has_items=0
# Try as RSS first
count=0
i=1
while [ $count -lt 5 ]; do
title=$(echo "$FEED_CONTENT" | xmlstarlet sel -t -v "//item[$i]/title" 2>/dev/null)
if [ -z "$title" ]; then break; fi
link=$(echo "$FEED_CONTENT" | xmlstarlet sel -t -v "//item[$i]/link" 2>/dev/null)
date=$(echo "$FEED_CONTENT" | xmlstarlet sel -t -v "//item[$i]/pubDate" 2>/dev/null)
i=$((i+1))
if [[ "$link" == *"/shorts/"* ]]; then continue; fi # Skip shorts
if [ $has_items -eq 0 ]; then
echo "<h2>$TITLE</h2>" >> "$OUTPUT_FILE"
echo "<ul>" >> "$OUTPUT_FILE"
has_items=1
fi
formatted_date=""
item_epoch=$(date -d "$date" +%s 2>/dev/null)
is_new=""
new_class=""
if [ -n "$item_epoch" ] && (( current_epoch - item_epoch <= 604800 )); then
is_new=" NEW"
new_class=' class="new"'
fi
if [ -n "$date" ]; then
formatted_date=$(date -d "$date" +"%Y-%m-%d" 2>/dev/null)
if [ -z "$formatted_date" ]; then
formatted_date="$date" # Fallback to original if parsing fails
fi
fi
if [ -n "$formatted_date" ]; then
echo "<li$new_class><a href=\"$link\" target=\"_blank\">$title</a> ($formatted_date$is_new)</li>" >> "$OUTPUT_FILE"
else
echo "<li$new_class><a href=\"$link\" target=\"_blank\">$title</a>$is_new</li>" >> "$OUTPUT_FILE"
fi
count=$((count+1))
done
if [ $has_items -eq 1 ]; then
echo "</ul>" >> "$OUTPUT_FILE"
continue
fi
# Try as Atom if no RSS items
has_items=0
count=0
i=1
while [ $count -lt 5 ]; do
title=$(echo "$FEED_CONTENT" | xmlstarlet sel -N a="http://www.w3.org/2005/Atom" -t -v "//a:entry[$i]/a:title" 2>/dev/null)
if [ -z "$title" ]; then break; fi
link=$(echo "$FEED_CONTENT" | xmlstarlet sel -N a="http://www.w3.org/2005/Atom" -t -v "//a:entry[$i]/a:link[@rel='alternate']/@href" 2>/dev/null)
date=$(echo "$FEED_CONTENT" | xmlstarlet sel -N a="http://www.w3.org/2005/Atom" -t -v "//a:entry[$i]/a:published" 2>/dev/null)
i=$((i+1))
if [[ "$link" == *"/shorts/"* ]]; then continue; fi # Skip shorts
if [ $has_items -eq 0 ]; then
echo "<h2>$TITLE</h2>" >> "$OUTPUT_FILE"
echo "<ul>" >> "$OUTPUT_FILE"
has_items=1
fi
formatted_date=""
item_epoch=$(date -d "$date" +%s 2>/dev/null)
is_new=""
new_class=""
if [ -n "$item_epoch" ] && (( current_epoch - item_epoch <= 604800 )); then
is_new=" NEW"
new_class=' class="new"'
fi
if [ -n "$date" ]; then
formatted_date=$(date -d "$date" +"%Y-%m-%d" 2>/dev/null)
if [ -z "$formatted_date" ]; then
formatted_date="$date" # Fallback to original if parsing fails
fi
fi
if [ -n "$formatted_date" ]; then
echo "<li$new_class><a href=\"$link\" target=\"_blank\">$title</a> ($formatted_date$is_new)</li>" >> "$OUTPUT_FILE"
else
echo "<li$new_class><a href=\"$link\" target=\"_blank\">$title</a>$is_new</li>" >> "$OUTPUT_FILE"
fi
count=$((count+1))
done
if [ $has_items -eq 1 ]; then
echo "</ul>" >> "$OUTPUT_FILE"
else
echo "No items found or parse error for $feed" >&2
continue
fi
done < "$INPUT_FILE"
# Close HTML document
cat << EOF >> "$OUTPUT_FILE"
</body>
</html>
EOF
echo "HTML document generated: $OUTPUT_FILE"
[[ -d "$HOME/Videos/Internet-Shows" ]] && cp "$OUTPUT_FILE" "$HOME/Videos/Internet-Shows/"