Integrations

`ImageDescription`

A client to interact with the image description API. The API is used to generate short phrases that describe an image.

Methods:

Name	Description
`summarise_image`	Image) -> str: Summarise the image using the LLaMA API.

Source code in video_sampler/integrations/llava_chat.py

class ImageDescription:
    """A client to interact with the image description API.
    The API is used to generate short phrases that describe an image.

    Methods:
        summarise_image(image: Image) -> str:
            Summarise the image using the LLaMA API.
    """

    def __init__(self, url: str) -> None:
        if url is None:
            url = "http://localhost:8080/"
        self.url = url

    def summarise_image(self, image: Image) -> str:
        """Summarise the image
        Args:
            image (Image): The image to summarise.
        Returns:
            str: The description of the image.
        """
        ...

`summarise_image(image)`

Summarise the image Args: image (Image): The image to summarise. Returns: str: The description of the image.

Source code in video_sampler/integrations/llava_chat.py

def summarise_image(self, image: Image) -> str:
    """Summarise the image
    Args:
        image (Image): The image to summarise.
    Returns:
        str: The description of the image.
    """
    ...

`ImageDescriptionDefault`

Bases: ImageDescription

A client to interact with the LLaMA image description API. The API is used to generate short phrases that describe an image.

Methods:

Name	Description
`summarise_image`	Image) -> str: Summarise the image using the LLaMA API.

Source code in video_sampler/integrations/llava_chat.py

class ImageDescriptionDefault(ImageDescription):
    """A client to interact with the LLaMA image description API.
    The API is used to generate short phrases that describe an image.

    Methods:
        summarise_image(image: Image) -> str:
            Summarise the image using the LLaMA API.
    """

    def __init__(self, url: str = "http://localhost:8080/completion"):
        """Initialise the client with the base URL of the LLaMA API.
        Args:
            url (str): The base URL of the LLaMA API.
        """
        """TODO: migrate to OpenAI API when available"""
        super().__init__(url)
        self.headers = {
            "accept-language": "en-GB,en",
            "content-type": "application/json",
        }
        if api_key := os.getenv("OPENAI_API_KEY"):
            self.headers["Authorization"] = f"Bearer {api_key}"
        self.session = requests.Session()

    def get_prompt(self):
        return """You're an AI assistant that describes images using short phrases.
        The image is shown below.
        \nIMAGE:[img-10]
        \nASSISTANT:"""

    def summarise_image(self, image: Image) -> str:
        """Summarise the image using the LLaMA API.
        Args:
            image (Image): The image to summarise.
        Returns:
            str: The description of the image.
        """
        b64image = encode_image(resize_image(image))

        json_body = {
            "model": os.getenv("OPENAI_MODEL", "LLaVA_CPP"),
            "stream": False,
            "n_predict": 1000,
            "temperature": 0.1,
            "repeat_last_n": 78,
            "image_data": [{"data": b64image, "id": 10}],
            "cache_prompt": True,
            "top_k": 40,
            "top_p": 1,
            "min_p": 0.05,
            "tfs_z": 1,
            "typical_p": 1,
            "presence_penalty": 0,
            "frequency_penalty": 0,
            "mirostat": 0,
            "mirostat_tau": 5,
            "mirostat_eta": 0.1,
            "grammar": "",
            "n_probs": 0,
            "min_keep": 0,
            "api_key": os.getenv("OPENAI_API_KEY", ""),
            "slot_id": 0,
            "stop": ["</s>", "Llama:", "User:"],
            "prompt": self.get_prompt(),
        }
        response = self.session.post(
            f"{self.url}",
            json=json_body,
            headers=self.headers,
            stream=False,
        )
        if response.status_code != 200:
            print(f"Failed to summarise image: {response}")
            return None
        res = response.json()
        if "choices" in res:
            return res["choices"][0]["text"].strip()
        elif "content" in res:
            return res["content"].strip()
        raise ValueError(f"Failed to summarise image: unknown response format: {res}")

`init(url='http://localhost:8080/completion')`

Initialise the client with the base URL of the LLaMA API. Args: url (str): The base URL of the LLaMA API.

Source code in video_sampler/integrations/llava_chat.py

def __init__(self, url: str = "http://localhost:8080/completion"):
    """Initialise the client with the base URL of the LLaMA API.
    Args:
        url (str): The base URL of the LLaMA API.
    """
    """TODO: migrate to OpenAI API when available"""
    super().__init__(url)
    self.headers = {
        "accept-language": "en-GB,en",
        "content-type": "application/json",
    }
    if api_key := os.getenv("OPENAI_API_KEY"):
        self.headers["Authorization"] = f"Bearer {api_key}"
    self.session = requests.Session()

`summarise_image(image)`

Summarise the image using the LLaMA API. Args: image (Image): The image to summarise. Returns: str: The description of the image.

Source code in video_sampler/integrations/llava_chat.py

def summarise_image(self, image: Image) -> str:
    """Summarise the image using the LLaMA API.
    Args:
        image (Image): The image to summarise.
    Returns:
        str: The description of the image.
    """
    b64image = encode_image(resize_image(image))

    json_body = {
        "model": os.getenv("OPENAI_MODEL", "LLaVA_CPP"),
        "stream": False,
        "n_predict": 1000,
        "temperature": 0.1,
        "repeat_last_n": 78,
        "image_data": [{"data": b64image, "id": 10}],
        "cache_prompt": True,
        "top_k": 40,
        "top_p": 1,
        "min_p": 0.05,
        "tfs_z": 1,
        "typical_p": 1,
        "presence_penalty": 0,
        "frequency_penalty": 0,
        "mirostat": 0,
        "mirostat_tau": 5,
        "mirostat_eta": 0.1,
        "grammar": "",
        "n_probs": 0,
        "min_keep": 0,
        "api_key": os.getenv("OPENAI_API_KEY", ""),
        "slot_id": 0,
        "stop": ["</s>", "Llama:", "User:"],
        "prompt": self.get_prompt(),
    }
    response = self.session.post(
        f"{self.url}",
        json=json_body,
        headers=self.headers,
        stream=False,
    )
    if response.status_code != 200:
        print(f"Failed to summarise image: {response}")
        return None
    res = response.json()
    if "choices" in res:
        return res["choices"][0]["text"].strip()
    elif "content" in res:
        return res["content"].strip()
    raise ValueError(f"Failed to summarise image: unknown response format: {res}")

`VideoSummary`

A client to interact with the LLaMA video summarisation API. The API is used to generate a summary of a video based on image descriptions.

Methods:

Name	Description
`summarise_video`	list[str]) -> str: Summarise the video using the LLaMA API.

Source code in video_sampler/integrations/llava_chat.py

class VideoSummary:
    """A client to interact with the LLaMA video summarisation API.
    The API is used to generate a summary of a video based on image descriptions.

    Methods:
        summarise_video(image_descriptions: list[str]) -> str:
            Summarise the video using the LLaMA API.
    """

    def __init__(self, url: str | None = "http://localhost:8080/v1"):
        """Initialise the client with the base URL of the LLaMA API.
        Args:
            url (str): The base URL of the LLaMA API."""
        self.url = url if url is not None else "http://localhost:8080/v1"
        self.client = OpenAI(base_url=self.url)

    def get_prompt(self):
        return """You're an AI assistant that summarises videos based on image descriptions.
        Combine image descriptions into a coherent summary of the video."""

    def summarise_video(self, image_descriptions: list[str]):
        """Summarise the video using the LLaMA API.
        Args:
            image_descriptions (list[str]): The descriptions of the images in the video.
        Returns:
            str: The summary of the video.
        """
        return self.client.chat.completions.create(
            model="LLaMA_CPP",
            messages=[
                {
                    "role": "system",
                    "content": self.get_prompt(),
                },
                {"role": "user", "content": "\n".join(image_descriptions)},
            ],
            max_tokens=300,
        )

`init(url='http://localhost:8080/v1')`

Initialise the client with the base URL of the LLaMA API. Args: url (str): The base URL of the LLaMA API.

Source code in video_sampler/integrations/llava_chat.py

def __init__(self, url: str | None = "http://localhost:8080/v1"):
    """Initialise the client with the base URL of the LLaMA API.
    Args:
        url (str): The base URL of the LLaMA API."""
    self.url = url if url is not None else "http://localhost:8080/v1"
    self.client = OpenAI(base_url=self.url)

`summarise_video(image_descriptions)`

Summarise the video using the LLaMA API. Args: image_descriptions (list[str]): The descriptions of the images in the video. Returns: str: The summary of the video.

Source code in video_sampler/integrations/llava_chat.py

def summarise_video(self, image_descriptions: list[str]):
    """Summarise the video using the LLaMA API.
    Args:
        image_descriptions (list[str]): The descriptions of the images in the video.
    Returns:
        str: The summary of the video.
    """
    return self.client.chat.completions.create(
        model="LLaMA_CPP",
        messages=[
            {
                "role": "system",
                "content": self.get_prompt(),
            },
            {"role": "user", "content": "\n".join(image_descriptions)},
        ],
        max_tokens=300,
    )

`encode_image(image)`

Convert the image to base64

Source code in video_sampler/integrations/llava_chat.py

def encode_image(image: Image):
    """
    Convert the image to base64
    """
    # create a buffer to store the image
    buffer = io.BytesIO()
    # save the image to the buffer
    image.save(buffer, format="JPEG")
    # convert the image to base64
    return base64.b64encode(buffer.getvalue()).decode("utf-8")

`resize_image(image, max_side=512)`

Resize the image to max_side if any of the sides is greater than max_side. If max_side is None, the image is returned as is.

Source code in video_sampler/integrations/llava_chat.py

def resize_image(image: Image, max_side: int = 512):
    """
    Resize the image to max_side if any of the sides is greater than max_side.
    If max_side is None, the image is returned as is.
    """
    # get the image shape
    if max_side is None:
        return image
    width, height = image.size
    if max(width, height) > max_side:
        # resize the image to max_side
        # keeping the aspect ratio
        if width > height:
            new_width = max_side
            new_height = int(height * max_side / width)
        else:
            new_height = max_side
            new_width = int(width * max_side / height)
        return image.resize((new_width, new_height))
    return image

Integrations

ImageDescription

summarise_image(image)

ImageDescriptionDefault

__init__(url='http://localhost:8080/completion')

summarise_image(image)

VideoSummary

__init__(url='http://localhost:8080/v1')

summarise_video(image_descriptions)

encode_image(image)

resize_image(image, max_side=512)

`ImageDescription`

`summarise_image(image)`

`ImageDescriptionDefault`

`init(url='http://localhost:8080/completion')`

`summarise_image(image)`

`VideoSummary`

`init(url='http://localhost:8080/v1')`

`summarise_video(image_descriptions)`

`encode_image(image)`

`resize_image(image, max_side=512)`