Initial commit

This commit is contained in:
2024-03-24 20:09:46 +01:00
commit f836e7c7ba
10 changed files with 1670 additions and 0 deletions

160
.gitignore vendored Executable file
View File

@ -0,0 +1,160 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

226
app/app.py Executable file
View File

@ -0,0 +1,226 @@
import tkinter as tk
from pathlib import Path
from tkinter import filedialog, messagebox
import cv2
import ttkbootstrap as ttk
from convert import differences, load_frames, select_frames
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg, NavigationToolbar2Tk
from matplotlib.figure import Figure
from params import ParameterTuner
from PIL import Image, ImageTk
from preview import SlidePreview
class SlideConversionApp:
def __init__(self):
self._root = tk.Tk()
self._root.title("PDF Extraktion")
self._root.geometry("1400x1000")
self._frames = None
self._diffs = None
# file selection
self._file_selection = tk.Frame(self._root)
self._file_selection_label = tk.Label(self._file_selection, text="Videodatei:")
self._file_selection_label.pack()
self._file_selection_button = ttk.Button(
self._file_selection, text="Datei auswählen", command=self._select_file
)
self._file_selection_button.pack()
self._file_selection.pack()
self._file = None
ttk.Separator(self._root, orient=tk.HORIZONTAL).pack(fill=tk.X, pady=10, padx=10)
# slide selection
self._selection = tk.Frame(self._root)
self._params = ParameterTuner(self._selection)
self._param_threshold = self._params.add("Schwellenwert", 10, 1, 20)
self._params.pack(side=tk.LEFT, padx=10)
self._show_diffs_btn = ttk.Button(self._selection, text="Unterschiede anzeigen", command=self._show_diffs)
self._show_diffs_btn.pack(side=tk.LEFT, padx=10)
self._add_selected_btn = ttk.Button(self._selection, text="Folien hinzufügen", command=self._add_selection)
self._add_selected_btn.pack(side=tk.LEFT, padx=10)
self._selection.pack()
ttk.Separator(self._root, orient=tk.HORIZONTAL).pack(fill=tk.X, pady=10, padx=10)
# generated images
self._images = SlidePreview(self._root, columns=6)
self._images.pack(fill=tk.BOTH, padx=10, expand=True)
ttk.Separator(self._root, orient=tk.HORIZONTAL).pack(fill=tk.X, pady=10, padx=10)
# buttons
self._buttons = tk.Frame(self._root)
self._buttons.pack(side=tk.RIGHT, pady=(0, 5))
self._button_create_pdf = ttk.Button(self._buttons, text="PDF erstellen", command=self._create_pdf)
self._button_create_pdf.pack(side=tk.RIGHT, padx=10)
self._button_add_frame = ttk.Button(self._buttons, text="Manuell hinzufügen", command=self._add_manually)
self._button_add_frame.pack(side=tk.RIGHT, padx=10)
self._set_interaction_state(tk.DISABLED)
self._root.mainloop()
def _select_file(self):
file = filedialog.askopenfilename(
title="Videodatei auswählen",
filetypes=[
("Videodateien", "*.mp4"),
("Alle Dateien", "*.*"),
],
)
if file is not None and file != "":
file = Path(file)
self._file_selected(file)
def _file_selected(self, file: Path):
# set variables
cap = cv2.VideoCapture(file.as_posix())
self._frames = None
# analyze file
fps = int(cap.get(cv2.CAP_PROP_FPS))
num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
# load frames
status = tk.Toplevel(self._root)
status.title("Lade Videoframes ..")
status.grab_set()
status_label = tk.Label(status, text=f"Lade Videoframes (0/{num_frames})")
status_label.pack(padx=10, pady=10)
status_pb = ttk.Progressbar(status, bootstyle="progress", orient=tk.HORIZONTAL, length=200, mode="determinate")
status_pb.pack(padx=10, pady=10)
def callback(text: str, frame_no: int, max_no: int):
status_label.configure(text=f"{text} ({frame_no}/{max_no})")
status_pb.configure(value=int((frame_no / max_no) * 100))
status_label.update()
status_pb.update()
status_pb.update_idletasks()
frames = load_frames(cap, fps, lambda f: callback("Lade Videoframes", f, num_frames))
diffs = differences(frames, lambda f: callback("Berechne Unterschiede", f, len(frames)))
status.grab_release()
status.destroy()
# update values & parameters
self._frames = frames
self._diffs = diffs
self._params.get_slider(self._param_threshold).configure(to=int(max(diffs)))
self._file_selection_button.configure(text=file.name)
self._file = file
self._images.clear()
self._set_interaction_state(tk.NORMAL)
def _set_interaction_state(self, state: str):
elements = [self._button_create_pdf, self._show_diffs_btn, self._add_selected_btn, self._button_add_frame]
self._params.set_state(state)
for element in elements:
element.configure(state=state)
def _show_diffs(self):
if self._diffs is None:
return
diffs = tk.Toplevel(self._root)
diffs.title("Unterschiede zwischen Frames")
diffs.grab_set()
fig = Figure(figsize=(10, 5), dpi=100)
plot = fig.add_subplot(111)
plot.plot(self._diffs)
canvas = FigureCanvasTkAgg(fig, master=diffs)
canvas.draw()
canvas.get_tk_widget().pack()
toolbar = NavigationToolbar2Tk(canvas, diffs)
toolbar.update()
canvas.get_tk_widget().pack()
def _add_selection(self):
self._images.clear()
images = select_frames(self._frames, self._diffs, threshold=self._params.get(self._param_threshold))
for image in images:
self._images.add(image)
def _create_pdf(self):
if len(self._images) == 0:
messagebox.showerror(title="PDF erstellen", message="Keine Folien vorhanden.")
return
file = filedialog.asksaveasfilename(
title="Speichern Unter",
filetypes=[
("PDF", "*.pdf"),
("Alle Dateien", "*.*"),
],
)
if file is not None and file != "":
file = Path(file).with_suffix(".pdf")
else:
return
images = self._images.get_images()
images[0].save(file, "PDF", resolution=100.0, save_all=True, append_images=images[1:])
messagebox.showinfo(title="PDF erstellen", message="Erfolgreich!")
def _add_manually(self):
if self._images._selected_index == -1:
messagebox.showerror(
title="Manuelles Hinzufügen",
message="Wähle zunächst eine Folie zum dahinter einfügen aus",
)
return
frameview = tk.Toplevel(self._root)
frameview.title("Manuell Hinzufügen")
frameview.grab_set()
thumbnail = tk.Label(frameview)
thumbnail.pack(padx=10, pady=5)
def show(frame_no: int):
img = Image.fromarray(self._frames[frame_no][:, :, ::-1])
img.thumbnail(size=(512, 512))
thumbnail.current = ImageTk.PhotoImage(img)
thumbnail.configure(image=thumbnail.current)
show(0)
selected = tk.IntVar(value=0)
selected.trace_add("write", lambda v, i, m: show(selected.get()))
framesel = tk.Frame(frameview)
framesel.pack(padx=10, pady=5)
back_btn = ttk.Button(
framesel,
text="<",
command=lambda: selected.set(0 if selected.get() - 1 < 0 else selected.get() - 1),
)
next_btn = ttk.Button(
framesel,
text=">",
command=lambda: selected.set(
len(self._frames) - 1 if selected.get() + 1 >= len(self._frames) else selected.get() + 1
),
)
slider = ttk.Scale(framesel, from_=0, to=len(self._frames) - 1, variable=selected, length=300)
back_btn.pack(side=tk.LEFT)
slider.pack(side=tk.LEFT, padx=5)
next_btn.pack(side=tk.LEFT)
def add_image():
self._images.insert(
Image.fromarray(self._frames[selected.get()][:, :, ::-1]),
self._images._selected_index + 1,
)
add_btn = ttk.Button(frameview, text="Hinzufügen", command=add_image)
add_btn.pack(pady=5)
if __name__ == "__main__":
app = SlideConversionApp()

50
app/convert.py Executable file
View File

@ -0,0 +1,50 @@
from typing import Callable
import cv2
import numpy as np
from PIL import Image
from tqdm import tqdm, trange
def compare(frame1, frame2):
absdiff = cv2.absdiff(frame1, frame2)
return np.mean(absdiff)
def load_frames(cap: cv2.VideoCapture, frame_interval: int, progress: Callable) -> list:
frames = []
num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
i = 0
res = cap.grab()
while i < num_frames:
if res:
frames.append(cap.retrieve()[1])
for _ in range(frame_interval):
res = cap.grab()
i += frame_interval
progress(i)
return frames
def differences(frames: list, progress: Callable) -> list[float]:
diffs = []
for index, (frame1, frame2) in enumerate(zip(frames[:-1], frames[1:]), start=1):
diff = compare(frame1, frame2)
diffs.append(diff)
progress(index)
return diffs
def select_frames(frames: list, diffs: list, threshold: float) -> list[Image.Image]:
selected_frames = [frames[0]]
for frame, diff in zip(frames[1:], diffs, strict=True):
if diff > threshold:
selected_frames.append(frame)
# Convert to Images
return [Image.fromarray(frame[:, :, ::-1]) for frame in selected_frames]

77
app/params.py Executable file
View File

@ -0,0 +1,77 @@
import tkinter as tk
from dataclasses import dataclass
import ttkbootstrap as ttk
@dataclass
class Parameter:
var: tk.IntVar | tk.DoubleVar
slider: ttk.Scale
class ParameterTuner(tk.Frame):
def __init__(self, parent, *args, **kwargs):
super().__init__(parent, *args, *kwargs)
self._params: dict[int, Parameter] = {}
def add(self, name: str, value: int | float, min_value: int | float, max_value: int | float) -> int:
frame = tk.Frame(self, width=200)
if isinstance(value, int):
var = tk.IntVar(frame, value=value)
labelformat = "{name}: {value}"
else:
var = tk.DoubleVar(frame, value=value)
labelformat = "{name}: {value:.2f}"
label = tk.Label(frame, text=f"{name}: {var.get()}")
slider = ttk.Scale(
frame,
variable=var,
from_=min_value,
to=max_value,
length=200,
orient=tk.HORIZONTAL,
)
var.trace_add(
"write",
lambda v, index, mode: label.configure(text=labelformat.format(name=name, value=var.get())),
)
label.pack()
slider.pack()
frame.pack(side=tk.RIGHT, padx=10)
param_id = len(self._params)
self._params[param_id] = Parameter(var, slider)
return param_id
def get(self, param_id: int) -> int | float:
return self._params[param_id].var.get()
def set(self, param_id: int, value: int | float):
self._params[param_id].var.set(value)
def get_slider(self, param_id: int) -> ttk.Scale:
return self._params[param_id].slider
def set_state(self, state: str, param_id: int = None):
if param_id is not None:
self._params[param_id].slider.configure(state=state)
else:
for param in self._params.values():
param.slider.configure(state=state)
if __name__ == "__main__":
main = tk.Tk()
pt = ParameterTuner(main)
param1 = pt.add("Frameabstand", 30, 1, 60)
param2 = pt.add("Schwellenwert", 0.2, 0, 5)
pt.pack()
main.mainloop()

151
app/preview.py Executable file
View File

@ -0,0 +1,151 @@
import tkinter as tk
from dataclasses import dataclass
import ttkbootstrap as ttk
from PIL import Image, ImageTk
from ttkbootstrap.scrolled import ScrolledFrame
class Slide(tk.Frame):
def __init__(self, preview, image: Image.Image):
super().__init__(preview._frame)
self.preview = preview
self.image = image
self.imagetk = ImageTk.PhotoImage(image)
self.label = tk.Label(self, image=self.imagetk, borderwidth=1, relief=tk.SOLID)
self.label.pack(fill=tk.X, pady=(0, 5))
self.delbutton = ttk.Button(self, text="Löschen", bootstyle="danger-outline", command=self._on_delete)
self.delbutton.pack(fill=tk.X)
self._is_selected = False
self.highlight = None
self._configure_id = None
self.bind("<Configure>", self._on_resize)
self.label.bind("<Button-1>", self._on_select)
def _on_resize(self, e: tk.Event):
if self._configure_id != None:
self.after_cancel(self._configure_id)
self._configure_id = self.after(100, self._on_final_resize)
def _on_select(self, e: tk.Event):
if self._is_selected:
self._on_unselect()
else:
self._is_selected = True
self.highlight = tk.Label(self, text="Ausgewählt", bg="green")
self.highlight.place(x=0, y=0)
self.preview.notify_selection(self)
def _on_unselect(self):
self._is_selected = False
if self.highlight is not None:
self.highlight.destroy()
def _on_final_resize(self):
thumbnail = self.image.copy()
thumbnail.thumbnail(size=(self.winfo_width(), self.image.height), resample=Image.NEAREST)
self.imagetk = ImageTk.PhotoImage(thumbnail)
self.label.configure(image=self.imagetk)
self._configure_id = None
def _on_delete(self):
self.preview.remove(self)
class SlidePreview(ScrolledFrame):
def __init__(self, parent, *args, columns: int = 4, **kwargs):
super().__init__(parent, *args, **kwargs)
self._frame = tk.Frame(self)
self._frame.pack(fill=tk.BOTH, expand=True)
self.columns = columns
for column in range(columns):
self._frame.grid_columnconfigure(column, weight=1)
self._selected_index = -1
self._slides = []
def _on_resize(self, e: tk.Event):
slide = self._slides[0]
slide.grid_forget()
slide.grid(row=0, column=0, sticky=tk.EW)
def notify_selection(self, selected: Slide):
self._selected_index = self._slides.index(selected)
for slide in self._slides:
if slide is not selected:
slide._on_unselect()
def notify_unselected(self, unselected: Slide):
if self._slides.index(unselected) == self._selected_index:
self._selected_index = -1
def add(self, slide: Image.Image):
index = len(self._slides)
s = Slide(self, slide)
s.grid(row=index // self.columns, column=index % self.columns, sticky=tk.NSEW, padx=10, pady=10)
self._slides.append(s)
def insert(self, slide: Image.Image, index: int):
s = Slide(self, slide)
self._slides.insert(index, s)
for index, slide in enumerate(self._slides):
slide.grid_forget()
slide.grid(row=index // self.columns, column=index % self.columns, sticky=tk.NSEW, padx=10, pady=10)
slide._on_final_resize()
if index <= self._selected_index:
self._selected_index += 1
def remove(self, slide: Slide):
index = self._slides.index(slide)
if index < 0:
return
if index == self._selected_index:
self._selected_index = -1
elif index < self._selected_index:
self._selected_index -= 1
slide.destroy()
del self._slides[index]
for index, slide in enumerate(self._slides[index:], start=index):
slide.grid_forget()
slide.grid(row=index // self.columns, column=index % self.columns, sticky=tk.NSEW, padx=10, pady=10)
def clear(self):
for slide in self._slides:
slide.destroy()
self._slides = []
self._selected_index = -1
def __len__(self):
return len(self._slides)
def get_images(self) -> list[Image.Image]:
return [slide.image for slide in self._slides]
if __name__ == "__main__":
main = tk.Tk()
main.geometry("800x400")
sp = SlidePreview(main)
sp.pack(fill=tk.BOTH, expand=True)
for filename in [f"{i:02}" for i in range(16)]:
img = Image.open(f"machine learning in der radiologie_5ebbrbz/{filename}.png")
sp.add(img)
main.mainloop()

4
app/requirements.txt Executable file
View File

@ -0,0 +1,4 @@
opencv-python
ttkbootstrap
tqdm
matplotlib

View File

@ -0,0 +1,151 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"import cv2\n",
"import numpy as np\n",
"from tqdm import tqdm, trange\n",
"from PIL import Image, ImageChops"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"VIDEO_FILE = \"diagnostik_aorta_gefaesse_fall8.mp4\"\n",
"cap = cv2.VideoCapture(VIDEO_FILE)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"def _load_frames_sequentially(cap: cv2.VideoCapture, frame_interval: int):\n",
" frames = []\n",
" for i in range(int(cap.get(cv2.CAP_PROP_FRAME_COUNT))):\n",
" res, frame = cap.read()\n",
" if i % frame_interval == 0 and res:\n",
" frames.append(frame)\n",
" return frames"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"def _load_frames_random_access(cap: cv2.VideoCapture, frame_interval: int):\n",
" frames = []\n",
" total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))\n",
" i = 0\n",
" while i < total_frames:\n",
" cap.set(cv2.CAP_PROP_POS_FRAMES, i)\n",
" res, frame = cap.read()\n",
" if res:\n",
" frames.append(frame)\n",
" i += frame_interval\n",
" return frames"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"def _load_frames_ra_grab(cap: cv2.VideoCapture, frame_interval: int):\n",
" frames = []\n",
" total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))\n",
" i = 0\n",
" res = cap.grab()\n",
" while i < total_frames:\n",
" if res:\n",
" frames.append(cap.retrieve()[1])\n",
" for _ in range(frame_interval):\n",
" res = cap.grab()\n",
" i += frame_interval\n",
" return frames"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"def load_frames(cap: cv2.VideoCapture, frame_interval: int, method: str):\n",
" if method == \"sequentially\":\n",
" return _load_frames_sequentially(cap, frame_interval)\n",
" elif method == \"random-access\":\n",
" return _load_frames_random_access(cap, frame_interval)\n",
" elif method == \"ra-grab\":\n",
" return _load_frames_ra_grab(cap, frame_interval)\n",
" else:\n",
" raise ValueError(\"Unknown method\")"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"method='ra-grab', duration=6.1883978843688965\n",
"method='random-access', duration=14.68180513381958\n"
]
}
],
"source": [
"import time\n",
"\n",
"for method in (\"ra-grab\", \"random-access\"):\n",
" cap = cv2.VideoCapture(VIDEO_FILE)\n",
" start = time.time()\n",
" frames = load_frames(cap, 30, method)\n",
" print(f\"{method=}, duration={time.time() - start}\")"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Resultat: Random Access und nur Dekodieren wenn benötigt ist am schnellsten"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.1"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}

File diff suppressed because one or more lines are too long

253
experimental/slides.ipynb Executable file
View File

@ -0,0 +1,253 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [],
"source": [
"import cv2\n",
"import numpy as np\n",
"from tqdm import tqdm\n",
"from PIL import Image, ImageChops"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"VIDEO_FILE = \"thorax_wiederholung_theorie_2_edit.mp4\""
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"cap = cv2.VideoCapture(VIDEO_FILE)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"60"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fps = int(cap.get(cv2.CAP_PROP_FPS))\n",
"fps"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 36788/36788 [03:05<00:00, 198.49it/s]\n"
]
}
],
"source": [
"frames = []\n",
"total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))\n",
"\n",
"for i in tqdm(range(total_frames)):\n",
" ret, frame = cap.read()\n",
" if i % fps == 0 and ret:\n",
" frames.append(frame)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"images = [ Image.fromarray(frame[:,:,::-1]) for frame in frames]"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [],
"source": [
"thumbnails = [ img.copy() for img in images]\n",
"for img in thumbnails:\n",
" img.thumbnail((64,64))"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [],
"source": [
"def compare(frame1, frame2):\n",
" diff = frame1 - frame2\n",
" return diff.sum() / (255 * np.prod(diff.shape))"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [],
"source": [
"def compare(image1, image2):\n",
" diff = ImageChops.difference(image1, image2)\n",
" return diff.histogram()"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"list"
]
},
"execution_count": 63,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"type(compare(thumbnails[0], thumbnails[10]))"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 613/613 [00:06<00:00, 95.81it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Found 73 slides.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"diffs = []\n",
"threshold = 0.23\n",
"\n",
"selected_frames = [frames[0]]\n",
"\n",
"compare_frame = frames[0]\n",
"for frame in tqdm(frames[1:]):\n",
" diff = compare(compare_frame, frame)\n",
" diffs.append(diff)\n",
" if diff > threshold:\n",
" compare_frame = frame\n",
" selected_frames.append(frame)\n",
"\n",
"print(f\"Found {len(selected_frames)} slides.\")"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"73"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(selected_frames)"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"73it [00:24, 3.02it/s]\n"
]
}
],
"source": [
"from PIL import Image\n",
"from os import makedirs\n",
"\n",
"imgdir = VIDEO_FILE.removesuffix('.mp4')\n",
"\n",
"makedirs(imgdir, exist_ok=True)\n",
"\n",
"\n",
"for index,frame in tqdm(enumerate(selected_frames)):\n",
" im = Image.fromarray(frame[:,:,::-1], mode=\"RGB\")\n",
" im.save(f\"{imgdir}/{index:02}.png\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.1"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}

22
experimental/slides.py Executable file
View File

@ -0,0 +1,22 @@
import argparse
from pathlib import Path
import cv2
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("video", type=Path, help="Video file")
parser.add_argument("--interval", "-i", type=int, default=1, help="Frame intervals")
args = parser.parse_args()
cap = cv2.VideoCapture(args.video.as_posix())
fps = cap.get(cv2.CAP_PROP_FPS)
frames = []
f = 0
while f < cap.get(cv2.CAP_PROP_FRAME_COUNT):
cap.set(cv2.CAP_PROP_POS_FRAMES, f)
frames.append(cap.read())
f += int(fps)
print(len(frames))