Movie image fetcher — Robin Wils's website

Last modified: Sun, Oct 23, 2022

Powershell command

$apiKey="" # put your themoviedb.org API key here

# Loops over md files, get the names, uses the names to call tmdb API
Get-ChildItem "~/Documents/GitProjects/robinwils.com/content/escapism/movies-and-series/reviews/" -File | Foreach-Object -ThrottleLimit 3 -Parallel {
    $fileName = $_.BaseName
    $imagePath = "~/Documents/GitProjects/robinwils.com/static/images/escapism/movies-and-series/reviews/og-images/${fileName}.jpg"
    # Only fetch image if it does not exist yet
    If (-Not (Test-Path -Path $imagePath)) {
      # Remove filler from fileNames
      $movieToSearch=$fileName.
        Replace('-', ' ').
        Replace('franchise', '').
        Replace('movies', '').
        Replace('movie', '').
        Replace('series', '').
        Replace('biographical', '').
        Replace('animated', '').
        Replace('anime', '').
        Replace('musical', '').
        Replace('(', '').
        Replace(')', '').
        # Trim string
        Trim().
        # hardcoded exceptions
        Replace('the girl next door and an american crime', 'the girl next door 2007').
        Replace('braindead also known as dead alive', 'braindead 1992').
        Replace('the simpsons', 'the simpsons movie').
        Replace('come and see', 'come and see 1985')
      # Fetch year argument if year is present in movieToSearch
      If ($movieToSearch -match '.+?[0-9][0-9][0-9][0-9]$') {
        $year=$movieToSearch.Substring($movieToSearch.Length - 4)
        $movieToSearch=$movieToSearch.Substring(0, $movieToSearch.Length - 4).TrimEnd()
      }
      # Build search query
      $searchQuery="query=${movieToSearch}&api_key=${using:apiKey}&page=1&include_adult=true"
      If ($year) {
        $searchQuery="${searchQuery}&year=${year}"
      }

      # Attempt to fetch poster
      $moviePoster=(
        Invoke-RestMethod -Uri "https://api.themoviedb.org/4/search/movie?${searchQuery}" -Method Get -SkipHttpErrorCheck
      ).results[0].poster_path

      # If it can't find it, it is possible that it is a series
      If (!$moviePoster) {
        $moviePoster=(
          Invoke-RestMethod -Uri "https://api.themoviedb.org/4/search/tv?${searchQuery}" -Method Get -SkipHttpErrorCheck
        ).results[0].poster_path
      }
      If (!$moviePoster) {
        Write-Output "Couldn't find poster for $movieToSearch"
      }
      Else {
        # Download poster
        Invoke-WebRequest "https://www.themoviedb.org/t/p/w300_and_h450_bestv2$moviePoster" -OutFile $imagePath
        # Next run it shouldn't reuse these variables
        Clear-Variable -Name "moviePoster" -ErrorAction SilentlyContinue
        Clear-Variable -Name "year" -ErrorAction SilentlyContinue
      }
    }
}

# Optional, reduce image size with exiftool
exiftool -all= -r -overwrite_original ~/Documents/GitProjects/robinwils.com/static/images

Bash

Previously Ι did this in bash, but it might no longer be valid.

getIdLists.sh

apiKey="secret" # put your themoviedb.org API key here
for file in ~/Documents/Projects/robinwils.com/content/categories/reviews/*.md; do
  sh getMovieId.sh "$(basename $file)" "$apiKey";
  sh getTVId.sh "$(basename $file)" "$apiKey";
done

getMovieId.sh

query="$1"
apiKey="$2"

if grep -Fxq "$query" movie-ids.txt
then
    echo "Movie already present"
else
    actualQuery="$query"
    replacement=" "
    actualQuery=${actualQuery//-/$replacement}
    replacement=""
    actualQuery=${actualQuery//franchise/$replacement}
    actualQuery=${actualQuery//movies/$replacement}
    actualQuery=${actualQuery//movie/$replacement}
    actualQuery=${actualQuery//series/$replacement}
    actualQuery=${actualQuery//.md/$replacement}
    echo $actualQuery
    id=$(curl -G "https://api.themoviedb.org/3/search/movie?api_key=$apiKey&page=1&include_adult=true" --data-urlencode "query=$actualQuery" | jq '.results[].id' | cut -d '"' -f 2 | grep -m1 "")
    re='^[0-9]+$'
    if ! [[ $id =~ $re ]] ; then
        echo "error: Not a number"
    else
        echo "$id;${query}" >> movie-ids.txt
    fi
    sleep 5
fi

getTVId.sh

query="$1"
apiKey="$2"

if grep -Fxq "$query" movie-ids.txt
then
    echo "Movie already present"
else
    actualQuery="$query"
    replacement=" "
    actualQuery=${actualQuery//-/$replacement}
    replacement=""
    actualQuery=${actualQuery//franchise/$replacement}
    actualQuery=${actualQuery//movies/$replacement}
    actualQuery=${actualQuery//movie/$replacement}
    actualQuery=${actualQuery//series/$replacement}
    actualQuery=${actualQuery//.md/$replacement}
    echo $actualQuery
    id=$(curl -G "https://api.themoviedb.org/3/search/tv?api_key=$apiKey&page=1&include_adult=true" --data-urlencode "query=$actualQuery" | jq '.results[].id' | cut -d '"' -f 2 | grep -m1 "")
    re='^[0-9]+$'
    if ! [[ $id =~ $re ]] ; then
        echo "error: Not a number"
    else
        echo "$id;${query}" >> tv-ids.txt
    fi
    sleep 5
fi

imageFetch.sh

apiKey="secret" # put your themoviedb.org API key here

for line in $(cat movie-ids.txt); do
    name=${line#*;}
    id=${line%";$name"}
    sh imageFetchByIdMovie.sh "$id" "$name" "$apiKey"
done

for line in $(cat tv-ids.txt); do
    name=${line#*;}
    id=${line%";$name"}
    sh imageFetchByIdTV.sh "$id" "$name" "$apiKey"
done

imageFetchByIdMovie.sh

id="$1"
name="$2"
apiKey="$3"
image=$(curl "https://api.themoviedb.org/3/movie/$id?api_key=$apiKey"| jq '.poster_path' | cut -d '"' -f 2 | grep -m1 "")

wget -O "images/$name.jpg" "https://www.themoviedb.org/t/p/w300_and_h450_bestv2$image"
if [ -s "images/$name.jpg" ] ; then
    echo "success"
else
    rm "images/$name.jpg"
    echo "error - images/$name.jpg removed"
fi

imageFetchByIdMovie.sh

id="$1"
name="$2"
apiKey="$3"
image=$(curl "https://api.themoviedb.org/3/tv/$id?api_key=$apiKey"| jq '.poster_path' | cut -d '"' -f 2 | grep -m1 "")

wget -O "images/$name.jpg" "https://www.themoviedb.org/t/p/w300_and_h450_bestv2$image"
if [ -s "images/$name.jpg" ] ; then
    echo "success"
else
    rm "images/$name.jpg"
    echo "error - images/$name.jpg removed"
fi