diff --git a/Makefile b/Makefile index 89f5e70..109435a 100644 --- a/Makefile +++ b/Makefile @@ -1,2 +1,6 @@ help: @cat doc/help.txt + +pdftotext: + @find ./data -iname '*.pdf' -execdir pdftotext {} \; + @find ./data -not \( -path ./data/text -prune \) -iname '*.txt' -exec mv {} './data/text/' ';' diff --git a/doc/help.txt b/doc/help.txt index 508455a..6056996 100644 --- a/doc/help.txt +++ b/doc/help.txt @@ -1,2 +1,6 @@ With this command, you'll be able to manage easily the extraction of URLs from books scanned by KBR + +Here is the list of commands and what there are doing: +* make pdftotext: this command extract a text version of the PDF files + and copy these files to the data/text/ directory