Skip to main content

Beginner's tutorial on OpenCV using Python

OpenCV is an open-source cross-platform computer vision library having interfaces in different languages like C++, Java, Python. In this post, we are going to look at some of the basic functionality offered by OpenCV.

Let's start with understanding some basics on images. In simpler terms, an image can be represented in a 2-D matrix of pixels. As an example, if we have to represent number 1, box/pixel which is displaying black text can be represented by 0 and others by 1. This is known as Binary Image or 2-level image.

Gray Scale Image : An image having 256 levels i.e. an 8 bit gray scale image means 2^8 levels from black (0) to white (255). In grayscale images, 2-dimensional matrix will have values between 0-255.

Colored Image : A colored image will have three gray scale images representing intensity of Red, Green and Blue channels.

Installing OpenCV :
We can install OpenCV using pip install opencv-python

Reading Images :

import cv2
import numpy as np

# Reading Images

# Images are read using "imread" method
img = cv2.imread("opencv_resources/image1.png")

# Use "imshow" method to display the image
# waitkey will keep the image open for the defined time, 0 is infinite, 100 is 100 milli-seconds
cv2.imshow("My Image",img)
cv2.waitKey(5000)

# Workaround for Jupyter Notebook to close the image
cv2.destroyWindow("My Image")
#cv2.destroyAllWindows()
for i in range (1,5):
    cv2.waitKey(1)

Reading Videos :

fwidth = 640
fheight = 480

# Create a video-capture object and read images in while loop
# If image is read successfully, resize and display it
# Terminate it if `q` is pressed
video = cv2.VideoCapture("opencv_resources/video.mp4")
while True:
    success, img = video.read()
    if success:
        img = cv2.resize(img, (fwidth, fheight))
        cv2.imshow("Result", img)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
        
cv2.destroyAllWindows()
for i in range (1,5):
    cv2.waitKey(1)

Reading Webcam :

fwidth = 640
fheight = 480

# Create a video-capture object and read images in while loop
# Use 0 for reading the default web-cam 
# If image is read successfully, resize and display it
# Terminate it if `q` is pressed
video = cv2.VideoCapture(0)

video.set(3, frameWidth)
video.set(4, frameHeight)
video.set(10,150) # Brightness

while True:
    success, img = video.read()
    if success:
        img = cv2.resize(img, (fwidth, fheight))
        cv2.imshow("Result", img)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
        
cv2.destroyAllWindows()
for i in range (1,5):
    cv2.waitKey(1)

Basic Image operations :

img = cv2.imread("opencv_resources/image1.png")

k = np.ones((5,5),np.uint8)

# Convert image to gray scale
imgGrayScale = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)

# Blur the grayscale image
imgBlur = cv2.GaussianBlur(imgGrayScale,(9,9),0)

# Edge detector in image (use threshold)
imgCanny = cv2.Canny(img,200,200)

# Add thickness
imgDialation = cv2.dilate(imgCanny,k,iterations=1)

# Reduce thickness (make thinner)
imgEroded = cv2.erode(imgDialation,k,iterations=1)

cv2.imshow("Gray",imgGrayScale)
cv2.imshow("Blurred",imgBlur)
cv2.imshow("Canny Image",imgCanny)
cv2.imshow("Dialation Image",imgDialation)
cv2.imshow("Eroded Image",imgEroded)
cv2.waitKey(0)

cv2.destroyAllWindows()
for i in range (1,5):
    cv2.waitKey(1)

Resizing Images :

Image origin in OpenCV is at (0,0) and X-axis/Y-axis direction is shown below :- Notice that the Y-axis +ve side goes towards the bottom. 

(0,0) -----------------------------> X-axis (+)

| Y-axis (+)

# Image size operations

# Read the image and display its current size
img = cv2.imread("opencv_resources/car.png")

# Output : first arg is height, second arg is width and third arg is number of channels 
# eg (460, 600, 3)
# 3 means RGB image
print("Shape for original image is ", img.shape)


# Resize image
# ( new width, new height)

img1 = cv2.resize(img, (300,200))
print("Shape for resized image is ", img1.shape)


# Crop an Image
# We are using matrix slicing to get the part of image to be displayed (origin is at left-top)
# first index is height and second is width
img2 = img[0:200, 0:300]


# Display images

cv2.imshow("Image1",img)
cv2.imshow("Image2",img1)
cv2.imshow("Image3",img2)
cv2.waitKey(0)

# Workaround for Jupyter Notebook to close the image
cv2.destroyAllWindows()
for i in range (1,5):
    cv2.waitKey(1)

Creating new images :

# Create a new image (grayscale)

img1 = np.zeros((600,600))

# Create a new colored image

img2 = np.zeros((600,600,3), np.uint8)
img3 = np.zeros((600,600,3), np.uint8)

# Assign a color to whole image
# Assign orange color
# RGB value is (255,165,0)
# OpenCV expects BGR i.e. 0,165,255

img2[:] = 0,165,255

# Draw a line
# args - start point, end point, color, thickness
cv2.line(img3, (0,0), (400,400), (0,165,255), 3)

# Draw a rectangle
# args - image, start point - upper left corner, end point - lower right corner, color, thickness
cv2.rectangle(img3, (0,0), (200,200), (255,165,0), 2)

# Fill the rectangle using CV2.FILLED 
cv2.rectangle(img3, (0,0), (200,200), (255,165,0), cv2.FILLED)

# Draw a circle
# args - image, center point, radius, color, thickness
cv2.circle(img3, (400,400), 50, (255,0,255), 3)

# Add a text
# args - image, text, co-ordinate where to start text, font, scale, color, thickness
cv2.putText(img3, "Hello World", (200,200), cv2.FONT_HERSHEY_PLAIN, 1, (255,165,0), 1)


# Display Images

cv2.imshow("Image1",img1)
cv2.imshow("Image2",img2)
cv2.imshow("Image3",img3)

cv2.waitKey(0)

# Workaround for Jupyter Notebook to close the image
cv2.destroyAllWindows()
for i in range (1,5):
    cv2.waitKey(1)

Comments