Skip to content

Visualisation

build_feature_model(model_str)

Build a feature extraction model.

Parameters:

Name Type Description Default
model_str str

Model name.

required

Returns:

Name Type Description
tuple

Tuple of (model, extractor).

Source code in video_sampler/visualisation/clustering.py
15
16
17
18
19
20
21
22
23
24
25
26
def build_feature_model(model_str: str):
    """Build a feature extraction model.

    Args:
        model_str (str): Model name.

    Returns:
        tuple: Tuple of (model, extractor).
    """
    extractor = AutoFeatureExtractor.from_pretrained(model_str)
    model = ResNetModel.from_pretrained(model_str)
    return model, extractor

cluster_features(features, max_clusters=50)

Cluster features using t-SNE and KMeans

Parameters:

Name Type Description Default
features ndarray

dict with keys "embeds" and "paths"

required
max_clusters int

maximum number of clusters

50
Retruns

tuple: of (X, cluster_labels)

Source code in video_sampler/visualisation/clustering.py
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
def cluster_features(
    features,
    max_clusters=50,
):
    """Cluster features using t-SNE and KMeans

    Args:
        features (np.ndarray): dict with keys "embeds" and "paths"
        max_clusters (int): maximum number of clusters

    Retruns:
      tuple: of (X, cluster_labels)
    """
    proj = TSNE(n_components=2, perplexity=35, metric="cosine")
    Xorg = np.asarray(features["embeds"])
    X = proj.fit_transform(Xorg)

    # take about 10% of the frame as the number of clusters
    n_clusters = min(int(0.1 * len(features["embeds"])), max_clusters)
    cluster_model = KMeans(n_clusters=n_clusters, random_state=0).fit(Xorg)
    return X, cluster_model.labels_

extract_features(model_str, image_folder, mkey='pixel_values', batch_size=8)

Extract features from a folder of images.

Parameters:

Name Type Description Default
model_str str

Model name.

required
image_folder Path

Folder with images.

required
mkey str

Key for the pixel values. Defaults to "pixel_values".

'pixel_values'
batch_size int

Batch size. Defaults to 8.

8

Returns:

Name Type Description
dict

Dictionary with keys "embeds" and "paths".

Source code in video_sampler/visualisation/clustering.py
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
def extract_features(
    model_str: str, image_folder: Path, mkey="pixel_values", batch_size: int = 8
):
    """Extract features from a folder of images.

    Args:
        model_str (str): Model name.
        image_folder (Path): Folder with images.
        mkey (str, optional): Key for the pixel values. Defaults to "pixel_values".
        batch_size (int, optional): Batch size. Defaults to 8.

    Returns:
        dict: Dictionary with keys "embeds" and "paths".
    """

    out_features = defaultdict(list)
    model, extractor = build_feature_model(model_str)
    with torch.no_grad():
        all_files = list(image_folder.iterdir())
        for batch in tqdm(
            batched(all_files, batch_size), total=len(all_files) // batch_size
        ):
            # load images
            batch_imgs = [Image.open(img_path).convert("RGB") for img_path in batch]
            # extract features
            batch_imgs = extractor(batch_imgs, return_tensors="pt")[mkey]
            batch_features = model(batch_imgs).pooler_output.squeeze()
            if len(batch) == 1:
                batch_features = batch_features.expand(1, -1)
            batch_features = torch.functional.F.normalize(batch_features, p=2, dim=1)
            out_features["embeds"].extend(batch_features)
            out_features["paths"].extend([img_path.name for img_path in batch])
    return out_features