download-video-subs/download-video-subs
File Type: text/x-shellscript
#!/bin/bash
#This script will download one video for each channel in a list. See below for settings.
#After downloading $MAXDOWNLOADS videos, m3u8 playlists as well as folders full of symlinks are generated
#for all of the videos downloaded on that day. This script is meant to be run once per day
#(i.e. in the middle of the night). Playlists older than $CLEANUPDATE are cleaned up.
#You can pass "--skip-downloads" if you just want to regenerate the playslists without downloading anything,
#and/or "--skip-playlists" to skip building the per-day playlists.
# #Your urls.txt file should take the following format... one URL for each line (begin comment lines with a #):
#
# #URL[;Title Filter][;Number of playlist items to check][;Format Override]
SCRIPTDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
#YTDL=/usr/local/bin/yt-dlp
YTDL="$SCRIPTDIR/yt-dlp"
[[ -x "$YTDL" ]] || exit 1
SKIPDOWNLOADS=false
SKIPPLAYLISTS=false
DOWNLOADSBYDATE=false
while test $# -gt 0
do
case "$1" in
--skip-downloads) SKIPDOWNLOADS=true
;;
--do-downloads) SKIPDOWNLOADS=false
;;
--skip-playlists) SKIPPLAYLISTS=true
;;
--do-playlists) SKIPPLAYLISTS=false
;;
--downloads-by-date) DOWNLOADSBYDATE=true
;;
--downloads-by-playlist) DOWNLOADSBYDATE=false
;;
--*) echo "bad option $1"
;;
*) echo "argument $1"
;;
esac
shift
done
#File with URLS to check
URLSFILE="$SCRIPTDIR/urls-sean.txt"
#Archive file which keeps track of already downloaded videos
ARCHIVEFILE="$SCRIPTDIR/archive-$HOSTNAME.txt"
#Temp file, used to keep track of total downloads
TEMPFILE="/tmp/`basename "$0"`.$$"
#Where to store regular downloads
SHOWSFOLDER="$HOME/Videos/Internet-Shows"
#where to put generated playlists and symlinks
PLAYLISTFOLDER="$SHOWSFOLDER"
#Don't download videos older than this
FIRSTDATE="`date --date='-1 month' +%Y%m%d`"
#Don't download anything published after this date
LASTDATE="`date +%Y%m%d`"
#Don't keep videos longer than this
CLEANUPDATE="`date --date='-2 weeks' +%Y%m%d`"
#Filename template (see youtube-dl docs)
if [[ $DOWNLOADSBYDATE = false ]]; then
FILETEMPLATE="%(playlist)s/%(upload_date)s-%(title)s.%(ext)s"
else
FILETEMPLATE="$LASTDATE/%(playlist)s-%(title)s.%(ext)s"
fi
#Downloads per channel per script execution.
URLDOWNLOADS=1
#Total downloads per script execution.
MAXDOWNLOADS=15
#Check back this many videos in the playlist (can be overridden in the urls.txt file)
PLAYLISTEND=5
#see youtube-dl docs for valid format strings
FORMAT="best[height<=720]/best[height<=1080]"
#don't download currently live videos
FILTER="!is_live"
#850M = roughtly four hours
MAXFILESIZE=1024M
#Root URL For serving via HTTP
SHOWSROOTURL="http://$(hostname)/internet-shows"
#File types to clean up
DOWNLOADCLEANUPFILETYPES="(\.mp4|\.m4v|\.webm|\.part|\.md|\.jpeg|\.jpg|\.ytdl|\.vtt)"
PLAYLISTCLEANUPFILETYPES="(\.m3u|\.m3u8)"
#Start all the downloadin'
if [[ $SKIPDOWNLOADS = false ]]; then
echo "Starting Downloads..."
echo "" > "$TEMPFILE" || exit 1
mkdir -p "$SHOWSFOLDER" || exit 1
grep -vE '^(\s*$|#)' "$URLSFILE" | while IFS=';' read -ra LINE; do
URL=${LINE[0]}
TITLEFILTER=${LINE[1]}
if [[ "$TITLEFILTER" = "" ]]; then
TITLEFILTER=".*"
fi
URLPLAYLISTEND=${LINE[2]}
if [[ "$URLPLAYLISTEND" = "" ]]; then
URLPLAYLISTEND=$PLAYLISTEND
fi
URLFORMAT=${LINE[3]}
if [[ "$URLFORMAT" = "" ]]; then
URLFORMAT="$FORMAT"
fi
$YTDL \
--socket-timeout 30 \
--download-archive "$ARCHIVEFILE" \
--dateafter "$FIRSTDATE" \
--max-downloads $URLDOWNLOADS \
--max-filesize $MAXFILESIZE \
--playlist-end $URLPLAYLISTEND \
--match-filter "$FILTER" \
--match-title "$TITLEFILTER" \
--output "$SHOWSFOLDER/$FILETEMPLATE" \
--restrict-filenames \
--format "$URLFORMAT" \
--no-progress \
--no-mtime \
--ignore-errors \
--no-overwrites \
--continue \
--force-ipv4 \
"`echo $URL | tr -d ' '`" | tee -a "$TEMPFILE" \
# --write-sub --write-auto-sub --sub-lang "en.*" \
# --simulate --verbose \
TOTALDOWNLOADS=`grep -o 'Download completed' $TEMPFILE | wc -l`
echo "Total downloads so far: $TOTALDOWNLOADS"
if [ $TOTALDOWNLOADS -ge $MAXDOWNLOADS ]; then
echo "Max downloads ($MAXDOWNLOADS) reached."
break
fi
done
#Clean up downloads folder
echo "Cleaning Up Old Downloads..."
find "$SHOWSFOLDER" \
-type f \
-regextype posix-egrep -regex ".*$DOWNLOADCLEANUPFILETYPES" \
! -newermt "$CLEANUPDATE" \
-exec bash -c 'echo Removing "$0" && rm "$0"' {} \;
#Remove Empty Directories and broken Symlinks
find "$SHOWSFOLDER" -empty -type d -delete
else
echo "Skipped Downloads"
fi
#Build playlists
if [[ $SKIPPLAYLISTS = false ]]; then
echo "Building Playlists..."
mkdir -p "$PLAYLISTFOLDER" || exit 1
if [[ $DOWNLOADSBYDATE = false ]]; then
d=$CLEANUPDATE
else
d=$LASTDATE
fi
while [ $d -le $LASTDATE ]; do
mkdir -p "$PLAYLISTFOLDER/$d" || exit 1
echo '#EXTM3U' > "$PLAYLISTFOLDER/$d/$d.m3u8"
#Build .m3u8 playlist from download date (file modified time)
find "$SHOWSFOLDER" \
-type f \
-iname "*.mp4" \
-newermt "$d" ! -newermt "`date --date="$d +1 day" +%Y%m%d`" >> "$PLAYLISTFOLDER/$d/$d.m3u8"
#Make a symlink for each playlist entry
#(useful if you share this folder via DLNA or SMB to your Roku,
#and unnecessary if the downloads are grouped by date)
if [[ $DOWNLOADSBYDATE = false ]]; then
cat "$PLAYLISTFOLDER/$d/$d.m3u8" | while read LINE; do
[[ "$LINE" != "#EXTM3U" ]] && ln -s -f "$LINE" "$PLAYLISTFOLDER/$d/$(basename $(dirname "$LINE"))-$(basename "$LINE")"
done
fi
#Create an HTTP-served version of the playlist.
sed "s#${SHOWSFOLDER}#${SHOWSROOTURL}#g" "$PLAYLISTFOLDER/$d/$d.m3u8" > "$PLAYLISTFOLDER/$d/$d-http.m3u8"
#If the downloads and playlists are in the same folder, use relative paths."
if [[ $SHOWSFOLDER = $PLAYLISTFOLDER ]]; then
sed -i "s#${SHOWSFOLDER}#\.\.#g" "$PLAYLISTFOLDER/$d/$d.m3u8"
fi
d=$(date --date="$d +1 day" +%Y%m%d)
done
echo "Cleaning Up Old Playlists..."
#Clean up playlists - by filename
find "$PLAYLISTFOLDER" \
-type f \
-regextype posix-egrep -regex ".*\/[0-9]{8}[^/]*$PLAYLISTCLEANUPFILETYPES" \
-exec bash -c 'fn=${0##*/}; d=${fn:0:8}; [[ $d -lt $1 ]] && echo Removing "$0" && rm "$0"' {} $CLEANUPDATE \;
#Clean up - by modified date (if no date in filename)
find "$PLAYLISTFOLDER" \
-type f \
-regextype posix-egrep -regex ".*$PLAYLISTCLEANUPFILETYPES" \
! -newermt "$CLEANUPDATE" \
-exec bash -c 'echo Removing "$0" && rm "$0"' {} \;
#Remove Empty Directories and broken Symlinks
find -L "$PLAYLISTFOLDER" -type l -delete
find "$PLAYLISTFOLDER" -empty -type d -delete
else
echo "Skipped Building Playlists"
fi
echo "Done."
[[ -f "$TEMPFILE" ]] && rm -f "$TEMPFILE"