tbaka-images2note/Analyzing images.ipynb

100 lines
2.3 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"import pathlib\n",
"\n",
"import ollama\n",
"import filetype"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"DATA_PATH = pathlib.Path(\"./data/\")"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"def cleaning_image_filenames(dir: pathlib.Path):\n",
" for path, dirs, files in dir.resolve().walk():\n",
" for file in files:\n",
" new_name = file.replace(' ', '_')\n",
" (path / file).rename(new_name)\n",
"\n",
"cleaning_image_filenames(DATA_PATH)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"def get_output(file: pathlib.Path, lang: str) -> pathlib.Path:\n",
" assert file.is_file(), \"Checking if this is a file.\"\n",
" return file.with_suffix(f'.{lang}.md')\n",
"\n",
"\n",
"prompt = 'Describe this image'\n",
"for path, dirs, files in DATA_PATH.resolve().walk():\n",
" for file in files:\n",
" if filetype.is_image(str(path / file)):\n",
" r = ollama.chat(\n",
" model=\"llava\",\n",
" messages=[{\n",
" 'role': 'user',\n",
" 'content': prompt,\n",
" 'images': [(path / file).resolve()]\n",
" }]\n",
" )\n",
" output_description = get_output(path / file, 'en')\n",
" output_description.write_text(f\"\"\"\n",
"## Image `{file}`\n",
"\n",
"![]({path / file})\n",
"\n",
"## {prompt}\n",
"\n",
"::: {{.border}}\n",
"{r['message']['content']}\n",
":::\n",
"\n",
"\"\"\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}