Photo by

  1. import related modules.
from caffe2.python import core, brew, workspace, model_helper
from caffe2.proto import caffe2_pb2
from caffe2.python.predictor import mobile_exporter
from caffe2.python.optimizer import (build_sgd, build_adagrad, build_adam)
import time
import itertools
import numpy as np

core.GlobalInit(['caffe2', '--caffe2_log_level=0'])
  1. create input and label:
def AddInput(model, batch_size, db, db_type):
    data, label = brew.db_input(model, ["data", "label"], # attention
                                batch_size=batch_size, db=db,
                                db_type=db_type)
    return data, label
  1. build model structure.
def add_mlp_model(model, data, device_opts, sizes=[20, 30, 10]):
    with core.DeviceScope(device_opts):
        layer = data
        for i in range(len(sizes) - 2):
            layer = brew.fc(model, layer, "dense_{}".format(i),
                            dim_in=sizes[i], dim_out=sizes[i + 1],
                            weight_init=('XavierFill', {}),
                            bias_init=('ConstantFill', {}))
            layer = brew.relu(model, layer, "relu_{}".format(i))
        layer = brew.fc(model, layer, "dense_{}".format(len(sizes) - 1),
                        dim_in=sizes[len(sizes) - 2],
                        dim_out=sizes[len(sizes) - 1],
                        weight_init=('XavierFill', {}),
                        bias_init=('ConstantFill', {}))
        softmax = brew.softmax(model, layer, "softmax")
    return softmax

def add_accuracy_and_xent(model, softmax, label):
    accuracy = brew.accuracy(model, [softmax, label], 'accuracy')
    xent = model.LabelCrossEntropy([softmax, label], 'xent')
    return accuracy, xent

def add_training_operators(model, softmax, label, opt_type='adam',lr=0.0001):
    _, xent = add_accuracy_and_xent(model, softmax, label)
    loss = model.AveragedLoss(xent, 'loss')
    model.AddGradientOperators([loss])
    if opt_type == 'adam':
        build_adam(model, base_learning_rate=lr)
    elif opt_type == 'sgd':
        build_sgd(model, base_learning_rate=lr, gamma=0.9,
                  policy='step', stepsize=1)
    elif opt_type == 'adagrad':
        build_adagrad(model, base_learning_rate=lr)

def build_model(valid_file, batch_size, db_type, sizes, device_opts, gpu_id, lr, grad_type='sgd'):
    # Training model
    train_model = model_helper.ModelHelper(name="lineimage_feature_train")
    train_model.net.RunAllOnGPU(gpu_id=gpu_id, use_cudnn=True)
    train_model.param_init_net.RunAllOnGPU(gpu_id=gpu_id, use_cudnn=True)
    softmax = add_mlp_model(train_model, "data", sizes=sizes, device_opts=device_opts)
    add_training_operators(train_model, softmax, "label", grad_type, lr)

    # validing model.
    valid_model = model_helper.ModelHelper(name="lineimage_feature_valid", init_params=False)
    valid_model.net.RunAllOnGPU(gpu_id=gpu_id, use_cudnn=True)
    valid_model.param_init_net.RunAllOnGPU(gpu_id=gpu_id, use_cudnn=True)
    data, label = AddInput(valid_model, batch_size=batch_size, db=valid_file, db_type=db_type)
    softmax = add_mlp_model(valid_model, data, sizes=sizes, device_opts=device_opts)
    add_accuracy_and_xent(valid_model, softmax, label)

    return train_model, valid_model, test_model
  1. start training your model. you need to create a data and label in the current graph workspace before training.
def train(train_file, train_counts, valid_file, valid_counts, lr ,dumped_file,
          gpu_id, sizes, max_epoch=10, batch_size=100,
          db_type='minidb', grad_type='sgd'):
    # Initialize train_model, test_model and deploy_model
    device_opts = core.DeviceOption(caffe2_pb2.CUDA, gpu_id)
    data_reader = model_helper.ModelHelper(name='minidb_reader')
    device_opts = core.DeviceOption(caffe2_pb2.CUDA, gpu_id)
    data_reader.TensorProtosDBInput([], ['data', 'label'],
                                    batch_size=batch_size,
                                    db=train_file, db_type=db_type)
    workspace.RunNetOnce(data_reader.param_init_net)
    workspace.CreateNet(data_reader.net)

    train_model, valid_model = build_model(valid_file,batch_size=batch_size,
                                           db_type=db_type, sizes=sizes,lr=lr,
                                           device_opts=device_opts, gpu_id=gpu_id,
                                           grad_type=grad_type)
    workspace.RunNetOnce(train_model.param_init_net)
    workspace.RunNetOnce(base_model.param_init_net)
    workspace.RunNetOnce(valid_model.param_init_net)
    # creating the network
    workspace.CreateNet(train_model.net)
    workspace.CreateNet(base_model.net)
    workspace.CreateNet(valid_model.net)
    print 'max epoch', max_epoch
    for epoch in range(max_epoch):
        max_iters = int(train_counts / batch_size)
        trainloss = 0
        trainacc = 0
        start = time.time()
        for i in range(max_iters):
            workspace.RunNet(data_reader.Proto().name)
            X = workspace.FetchBlob("data")
            Y = workspace.FetchBlob("label")
            # print X
            workspace.FeedBlob('data', X, device_option=device_opts)
            workspace.FeedBlob('label', Y, device_option=device_opts)
            workspace.RunNet(base_model.net.Proto().name)
            trainloss += workspace.FetchBlob('loss')
            trainacc += workspace.FetchBlob('accuracy')

        print "Epoch: %d, train acc: %.4f%%, train loss: %.4f" %
          (epoch, float(trainacc * 100. / max_iters), float(trainloss / max_iters)),
        timed = time.time() - start

        max_iters = int(valid_counts / batch_size)
        validacc = 0
        validxent = []
        for i in range(max_iters):
            workspace.RunNet(valid_model.net.Proto().name)
            validacc += workspace.FetchBlob('accuracy')
            validxent.append(workspace.FetchBlob('xent'))

        validacc = float(validacc * 100. / max_iters)
        validxent = np.mean(np.asarray(validxent).flatten())
        print "valid acc: %.4f%%, valid loss: %.6f" % (validacc, validxent),
        print "Time:", timed

    with open(".".join([dumped_file, '.train_net.pbtxt']), 'w')as fid:
        fid.write(str(train_model.Proto()))
    with open(".".join([dumped_file, '.train_init_net.pbtxt']), 'w')as fid:
        fid.write(str(train_model.param_init_net.Proto()))

References