!pip install semilearn
from google.colab import drive drive.mount('/content/drive') import os import glob import numpy as np from torchvision import transforms from semilearn import get_data_loader, get_net_builder, get_algorithm, get_config, Trainer from semilearn import split_ssl_data, BasicDataset from PIL import Image import torchvision
# define configs and create config config = { 'algorithm': 'fixmatch', # specify which algorithm you want to use. 'net': 'vit_tiny_patch2_32', # specify which model you want to use. 'use_pretrain': True, # whether or not to use pre-trained models 'pretrain_path': 'https://github.com/microsoft/Semi-supervised-learning/releases/download/v.0.0.0/vit_tiny_patch2_32_mlp_im_1k_32.pth', # the pretrained model path we have provided # optimization configs 'epoch': 100, # set to 100 'num_train_iter': 102400, # set to 102400 'num_eval_iter': 1024, # set to 1024 'num_log_iter': 256, # set to 256 'optim': 'AdamW', # AdamW optimizer 'lr': 5e-4, # Learning rate 'layer_decay': 0.5, # Layer-wise decay learning rate 'batch_size': 1,#16, # Batch size 'eval_batch_size': 1,#16, # dataset configs 'dataset': 'mnist', # default dataset config, can be ignored if using custom data 'num_labels': 1,#40, # number of labels in the dataset, can be ignored if already specified/spliited in custom data 'num_classes': 2,#10, # number of classes 'img_size': 32, # image size 'crop_ratio': 0.875, 'data_dir': './data', # algorithm specific configs 'hard_label': True, 'uratio': 2, 'ulb_loss_ratio': 1.0, # device configs 'gpu': 0, 'world_size': 1, "num_workers": 1,#2, 'distributed': False, } config = get_config(config)
# create model and specify algorithm algorithm = get_algorithm(config, get_net_builder(config.net, from_name=False), tb_log=None, logger=None)
#Step 2: Load Custom Data # lb_data = np.random.randint(0, 255, size=3072 * 1000).reshape((-1, 32, 32, 3)) # lb_data = np.uint8(lb_data) #lb_target = np.random.randint(0, 10, size=1000) # lb_target = np.random.randint(0, 2, size=1000) data_dir = '/content/drive/MyDrive/Workspace/Semi-supervised-learning/data/custom_data' lb_data = [] lb_da = torchvision.datasets.ImageFolder(os.path.join(data_dir, 'train')) for i_dset, t_dset in enumerate(lb_da.samples): pil_img = Image.open(t_dset[0]) #img_resize = pil_img.resize((32, 32)) nd_img = np.array(pil_img) lb_data.append(nd_img) #lb_data.append(t_dset[0]) lb_target = lb_da.targets # ulb_data = np.random.randint(0, 255, size=3072 * 5000).reshape((-1, 32, 32, 3)) # ulb_data = np.uint8(ulb_data) #ulb_target = np.random.randint(0, 10, size=5000) img_lst = [] ulb_data_root_dir = '/content/drive/MyDrive/Workspace/Semi-supervised-learning/data/custom_data_unlabel' search = os.path.join(ulb_data_root_dir, '*.png') img_path_lst = glob.glob(search) for img_path in img_path_lst: img = Image.open(img_path) img_lst.append(np.array(img)) ulb_data = img_lst # ulb_data = lb_data train_transform = transforms.Compose([transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, padding=int(32 * 0.125), padding_mode='reflect'), transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]) train_strong_transform = transforms.Compose([transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, padding=int(32 * 0.125), padding_mode='reflect'), transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]) lb_dataset = BasicDataset(config.algorithm, lb_data, lb_target, config.num_classes, train_transform, is_ulb=False) # ulb_dataset = BasicDataset(config.algorithm, lb_data, lb_target, config.num_classes, train_transform, is_ulb=True, # strong_transform=train_strong_transform) # ulb_dataset = BasicDataset(config.algorithm, lb_data, None, config.num_classes, train_transform, is_ulb=True, # strong_transform=train_strong_transform) ulb_dataset = BasicDataset(config.algorithm, ulb_data, None, config.num_classes, train_transform, is_ulb=True, strong_transform=train_strong_transform)
# eval_data = np.random.randint(0, 255, size=3072 * 100).reshape((-1, 32, 32, 3)) # eval_data = np.uint8(eval_data) #eval_target = np.random.randint(0, 10, size=100) #eval_target = np.random.randint(0, 2, size=100) eval_data = [] ulb_da = torchvision.datasets.ImageFolder(os.path.join(data_dir, 'val')) for i_dset, t_dset in enumerate(ulb_da.samples): pil_img = Image.open(t_dset[0]) img_resize = pil_img.resize((32, 32)) nd_img = np.array(img_resize) eval_data.append(nd_img) eval_target = ulb_da.targets eval_transform = transforms.Compose([transforms.Resize(32), transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]) eval_dataset = BasicDataset(config.algorithm, eval_data, eval_target, config.num_classes, eval_transform, is_ulb=False) train_lb_loader = get_data_loader(config, lb_dataset, config.batch_size) train_ulb_loader = get_data_loader(config, ulb_dataset, int(config.batch_size * config.uratio)) eval_loader = get_data_loader(config, eval_dataset, config.eval_batch_size) trainer = Trainer(config, algorithm) trainer.fit(train_lb_loader, train_ulb_loader, eval_loader) trainer.evaluate(eval_loader)