obsolete.computer

download-video-subs/fetch-feeds.sh

File Type: text/x-shellscript


#!/bin/bash

# This script reads a list of RSS feed URLs from a text file (one URL per line),
# fetches the 5 most recent item URLs and titles from each feed,
# and generates a cleanly presented HTML document with links.

# Usage: ./script.sh [rssfeeds.txt rssfeeds.html]
# - If no arguments provided, defaults to "rssfeeds.txt" as input and "rssfeeds.html" as output in the script's directory.
# - rssfeeds.txt: Input file with RSS feed URLs (one per line)
# - rssfeeds.html: Output HTML file (will be overwritten)

# Requirements:
# - curl: For fetching RSS feeds
# - xmlstarlet: For parsing XML (install via apt/yum/brew if needed: e.g., sudo apt install xmlstarlet)
# - GNU date: For date formatting (may require gdate on macOS)

if [ $# -eq 0 ]; then
    SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
    INPUT_FILE="$SCRIPT_DIR/rssfeeds.txt"
    OUTPUT_FILE="$SCRIPT_DIR/rssfeeds.html"
elif [ $# -eq 2 ]; then
    INPUT_FILE="$1"
    OUTPUT_FILE="$2"
else
    echo "Usage: $0 [input_feeds_file output_html_file]"
    echo "If no arguments, defaults to rssfeeds.txt and rssfeeds.html in the script's directory"
    exit 1
fi

current_epoch=$(date +%s)

# Start HTML document
cat << EOF > "$OUTPUT_FILE"
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>RSS Feed Recent Items</title>
    <style>
        body { font-family: Arial, sans-serif; margin: 20px; }
        h1 { text-align: center; }
        h2 { color: #333; }
        ul { list-style-type: none; padding: 0; }
        li { margin-bottom: 10px; }
        a { text-decoration: none; color: #007bff; }
        a:hover { text-decoration: underline; }
        .new a { color: orangered; }
    </style>
</head>
<body>
    <h1>Recent Items from RSS Feeds</h1>
EOF

# Process each feed
while IFS= read -r feed; do
    if [ -z "$feed" ]; then continue; fi  # Skip empty lines
    if [[ "$feed" == \#* ]]; then continue; fi  # Skip lines starting with #

    echo "Processing feed: $feed"

    # Fetch the feed content once
    FEED_CONTENT=$(curl -s "$feed")

    # Extract feed title
    TITLE=$(echo "$FEED_CONTENT" | xmlstarlet sel -t -v "//channel/title" 2>/dev/null)
    if [ -z "$TITLE" ]; then
        TITLE=$(echo "$FEED_CONTENT" | xmlstarlet sel -N a="http://www.w3.org/2005/Atom" -t -v "//a:feed/a:title" 2>/dev/null)
    fi
    if [ -z "$TITLE" ]; then
        TITLE="$feed"
    fi

    has_items=0

    # Try as RSS first
    count=0
    i=1
    while [ $count -lt 5 ]; do
        title=$(echo "$FEED_CONTENT" | xmlstarlet sel -t -v "//item[$i]/title" 2>/dev/null)
        if [ -z "$title" ]; then break; fi
        link=$(echo "$FEED_CONTENT" | xmlstarlet sel -t -v "//item[$i]/link" 2>/dev/null)
        date=$(echo "$FEED_CONTENT" | xmlstarlet sel -t -v "//item[$i]/pubDate" 2>/dev/null)

        i=$((i+1))

        if [[ "$link" == *"/shorts/"* ]]; then continue; fi  # Skip shorts

        if [ $has_items -eq 0 ]; then
            echo "<h2>$TITLE</h2>" >> "$OUTPUT_FILE"
            echo "<ul>" >> "$OUTPUT_FILE"
            has_items=1
        fi

        formatted_date=""
        item_epoch=$(date -d "$date" +%s 2>/dev/null)
        is_new=""
        new_class=""
        if [ -n "$item_epoch" ] && (( current_epoch - item_epoch <= 604800 )); then
            is_new=" NEW"
            new_class=' class="new"'
        fi
        if [ -n "$date" ]; then
            formatted_date=$(date -d "$date" +"%Y-%m-%d" 2>/dev/null)
            if [ -z "$formatted_date" ]; then
                formatted_date="$date"  # Fallback to original if parsing fails
            fi
        fi

        if [ -n "$formatted_date" ]; then
            echo "<li$new_class><a href=\"$link\" target=\"_blank\">$title</a> ($formatted_date$is_new)</li>" >> "$OUTPUT_FILE"
        else
            echo "<li$new_class><a href=\"$link\" target=\"_blank\">$title</a>$is_new</li>" >> "$OUTPUT_FILE"
        fi

        count=$((count+1))
    done

    if [ $has_items -eq 1 ]; then
        echo "</ul>" >> "$OUTPUT_FILE"
        continue
    fi

    # Try as Atom if no RSS items
    has_items=0
    count=0
    i=1
    while [ $count -lt 5 ]; do
        title=$(echo "$FEED_CONTENT" | xmlstarlet sel -N a="http://www.w3.org/2005/Atom" -t -v "//a:entry[$i]/a:title" 2>/dev/null)
        if [ -z "$title" ]; then break; fi
        link=$(echo "$FEED_CONTENT" | xmlstarlet sel -N a="http://www.w3.org/2005/Atom" -t -v "//a:entry[$i]/a:link[@rel='alternate']/@href" 2>/dev/null)
        date=$(echo "$FEED_CONTENT" | xmlstarlet sel -N a="http://www.w3.org/2005/Atom" -t -v "//a:entry[$i]/a:published" 2>/dev/null)

        i=$((i+1))

        if [[ "$link" == *"/shorts/"* ]]; then continue; fi  # Skip shorts

        if [ $has_items -eq 0 ]; then
            echo "<h2>$TITLE</h2>" >> "$OUTPUT_FILE"
            echo "<ul>" >> "$OUTPUT_FILE"
            has_items=1
        fi

        formatted_date=""
        item_epoch=$(date -d "$date" +%s 2>/dev/null)
        is_new=""
        new_class=""
        if [ -n "$item_epoch" ] && (( current_epoch - item_epoch <= 604800 )); then
            is_new=" NEW"
            new_class=' class="new"'
        fi
        if [ -n "$date" ]; then
            formatted_date=$(date -d "$date" +"%Y-%m-%d" 2>/dev/null)
            if [ -z "$formatted_date" ]; then
                formatted_date="$date"  # Fallback to original if parsing fails
            fi
        fi

        if [ -n "$formatted_date" ]; then
            echo "<li$new_class><a href=\"$link\" target=\"_blank\">$title</a> ($formatted_date$is_new)</li>" >> "$OUTPUT_FILE"
        else
            echo "<li$new_class><a href=\"$link\" target=\"_blank\">$title</a>$is_new</li>" >> "$OUTPUT_FILE"
        fi

        count=$((count+1))
    done

    if [ $has_items -eq 1 ]; then
        echo "</ul>" >> "$OUTPUT_FILE"
    else
        echo "No items found or parse error for $feed" >&2
        continue
    fi
done < "$INPUT_FILE"

# Close HTML document
cat << EOF >> "$OUTPUT_FILE"
</body>
</html>
EOF

echo "HTML document generated: $OUTPUT_FILE"

[[ -d "$HOME/Videos/Internet-Shows" ]] && cp "$OUTPUT_FILE" "$HOME/Videos/Internet-Shows/"

Meta