#!/bin/bash # ## Wizard Kit: MS Word content search tool TMP_FILE="$(mktemp)" IFS=$'\n' for s in $*; do REGEX="$s" REGEX=$(echo "$REGEX" | sed -r 's/\s+/\\s\*/g') # Word Doc for d in *doc; do if antiword "$d" | grep -iqsP "($REGEX)"; then echo "Possible match: $d" echo "$d" >> "$TMP_FILE" fi done # Word Docx for d in *docx; do if unzip -p "$d" word/document.xml | grep -iqsP "($REGEX)"; then echo "Possible match: $d" echo "$d" >> "$TMP_FILE" fi done done # Cleanup results if [[ -s "$TMP_FILE" ]]; then sort -u "$TMP_FILE" >> "$HOME/msword-matches.txt" fi rm "$TMP_FILE" # Done if [[ -s "$HOME/msword-matches.txt" ]]; then echo "Found $(wc -l "$HOME/msword-matches.txt") possible matches" echo "The results have been saved to $HOME" fi