Adding the 'extract URLs' feature to the workflow

This commit is contained in:
Emmanuel Di Pretoro 2018-08-24 13:42:24 +02:00
parent 637b4df401
commit 3df5df2db4

View File

@ -4,3 +4,6 @@ help:
pdftotext: pdftotext:
@find ./data -iname '*.pdf' -execdir pdftotext {} \; @find ./data -iname '*.pdf' -execdir pdftotext {} \;
@find ./data -not \( -path ./data/text -prune \) -iname '*.txt' -exec mv {} './data/text/' ';' @find ./data -not \( -path ./data/text -prune \) -iname '*.txt' -exec mv {} './data/text/' ';'
extract:
@perl extract_urls.pl data/text/*.txt