Adding the 'extract URLs' feature to the workflow
This commit is contained in:
parent
637b4df401
commit
3df5df2db4
3
Makefile
3
Makefile
@ -4,3 +4,6 @@ help:
|
|||||||
pdftotext:
|
pdftotext:
|
||||||
@find ./data -iname '*.pdf' -execdir pdftotext {} \;
|
@find ./data -iname '*.pdf' -execdir pdftotext {} \;
|
||||||
@find ./data -not \( -path ./data/text -prune \) -iname '*.txt' -exec mv {} './data/text/' ';'
|
@find ./data -not \( -path ./data/text -prune \) -iname '*.txt' -exec mv {} './data/text/' ';'
|
||||||
|
|
||||||
|
extract:
|
||||||
|
@perl extract_urls.pl data/text/*.txt
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user