Updated msword-search
This commit is contained in:
parent
4ddce7cfbe
commit
387062074a
1 changed files with 54 additions and 53 deletions
|
|
@ -9,10 +9,10 @@ import sys
|
||||||
# STATIC VARIABLES
|
# STATIC VARIABLES
|
||||||
SCANDIR = os.getcwd()
|
SCANDIR = os.getcwd()
|
||||||
USAGE = '''Usage: {script} <search-terms>...
|
USAGE = '''Usage: {script} <search-terms>...
|
||||||
e.g. {script} "Book Title" "Keyword" "etc"
|
e.g. {script} "Book Title" "Keyword" "etc"
|
||||||
|
|
||||||
This script will search all doc/docx files below the current directory for
|
This script will search all doc/docx files below the current directory for
|
||||||
the search-terms provided (case-insensitive).'''.format(script=__file__)
|
the search-terms provided (case-insensitive).'''.format(script=__file__)
|
||||||
|
|
||||||
# Init
|
# Init
|
||||||
os.chdir(os.path.dirname(os.path.realpath(__file__)))
|
os.chdir(os.path.dirname(os.path.realpath(__file__)))
|
||||||
|
|
@ -23,59 +23,60 @@ init_global_vars()
|
||||||
REGEX_DOC_FILES = re.compile(r'\.docx?$', re.IGNORECASE)
|
REGEX_DOC_FILES = re.compile(r'\.docx?$', re.IGNORECASE)
|
||||||
|
|
||||||
def scan_for_docs(path):
|
def scan_for_docs(path):
|
||||||
for entry in os.scandir(path):
|
for entry in os.scandir(path):
|
||||||
if entry.is_dir(follow_symlinks=False):
|
if entry.is_dir(follow_symlinks=False):
|
||||||
yield from scantree(entry.path)
|
yield from scan_for_docs(entry.path)
|
||||||
elif entry.is_file and REGEX_DOC_FILES.search(entry.name):
|
elif entry.is_file and REGEX_DOC_FILES.search(entry.name):
|
||||||
yield entry
|
yield entry
|
||||||
|
|
||||||
def scan_file(file_path, search):
|
def scan_file(file_path, search):
|
||||||
match = False
|
match = False
|
||||||
try:
|
try:
|
||||||
if entry.name.lower().endswith('.docx'):
|
if entry.name.lower().endswith('.docx'):
|
||||||
result = run_program(['unzip', '-p', entry.path])
|
result = run_program(['unzip', '-p', entry.path])
|
||||||
else:
|
else:
|
||||||
# Assuming .doc
|
# Assuming .doc
|
||||||
result = run_program(['antiword', entry.path])
|
result = run_program(['antiword', entry.path])
|
||||||
out = result.stdout.decode()
|
out = result.stdout.decode()
|
||||||
match = re.search(search, out, re.IGNORECASE)
|
match = re.search(search, out, re.IGNORECASE)
|
||||||
except Exception:
|
except Exception:
|
||||||
# Ignore errors since files may be corrupted
|
# Ignore errors since files may be corrupted
|
||||||
pass
|
pass
|
||||||
|
|
||||||
return entry.path if match else None
|
return entry.path if match else None
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
try:
|
try:
|
||||||
# Prep
|
# Prep
|
||||||
clear_screen()
|
clear_screen()
|
||||||
terms = [re.sub(r'\s+', r'\s*', t) for t in sys.argv[1:]]
|
terms = [re.sub(r'\s+', r'\s*', t) for t in sys.argv[1:]]
|
||||||
search = '({})'.format('|'.join(terms))
|
search = '({})'.format('|'.join(terms))
|
||||||
|
|
||||||
if len(sys.argv) == 1:
|
if len(sys.argv) == 1:
|
||||||
# Print usage
|
# Print usage
|
||||||
print_standard(USAGE)
|
print_standard(USAGE)
|
||||||
else:
|
else:
|
||||||
matches = []
|
matches = []
|
||||||
for entry in scan_for_docs(SCANDIR):
|
for entry in scan_for_docs(SCANDIR):
|
||||||
matches.append(scan_file(entry.path, search))
|
matches.append(scan_file(entry.path, search))
|
||||||
# Strip None values (i.e. non-matching entries)
|
# Strip None values (i.e. non-matching entries)
|
||||||
matches = [m for m in matches if m]
|
matches = [m for m in matches if m]
|
||||||
if matches:
|
if matches:
|
||||||
print_success('Found {} {}:'.format(
|
print_success('Found {} {}:'.format(
|
||||||
len(matches),
|
len(matches),
|
||||||
'Matches' if len(matches) > 1 else 'Match'))
|
'Matches' if len(matches) > 1 else 'Match'))
|
||||||
for match in matches:
|
for match in matches:
|
||||||
print_standard(match)
|
print_standard(match)
|
||||||
else:
|
else:
|
||||||
print_error('No matches found.')
|
print_error('No matches found.')
|
||||||
|
|
||||||
# Done
|
# Done
|
||||||
print_standard('\nDone.')
|
print_standard('\nDone.')
|
||||||
#pause("Press Enter to exit...")
|
#pause("Press Enter to exit...")
|
||||||
exit_script()
|
exit_script()
|
||||||
except SystemExit:
|
except SystemExit:
|
||||||
pass
|
pass
|
||||||
except:
|
except:
|
||||||
major_exception()
|
major_exception()
|
||||||
|
|
||||||
|
# vim: sts=2 sw=2 ts=2
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue