#!/usr/bin/env python # -*- coding: utf-8 -*- # @Date : 2020-05-17 # @Author : Shawn Shan (shansixiong@cs.uchicago.edu) # @Link : https://www.shawnshan.com/ import errno import glob import gzip import json import os import pickle import random import shutil import sys import tarfile import zipfile import PIL import six from six.moves.urllib.error import HTTPError, URLError stderr = sys.stderr sys.stderr = open(os.devnull, 'w') import keras sys.stderr = stderr import keras.backend as K import numpy as np import tensorflow as tf from PIL import Image, ExifTags from keras.layers import Dense, Activation from keras.models import Model from keras.preprocessing import image from fawkes.align_face import align from six.moves.urllib.request import urlopen if sys.version_info[0] == 2: def urlretrieve(url, filename, reporthook=None, data=None): def chunk_read(response, chunk_size=8192, reporthook=None): content_type = response.info().get('Content-Length') total_size = -1 if content_type is not None: total_size = int(content_type.strip()) count = 0 while True: chunk = response.read(chunk_size) count += 1 if reporthook is not None: reporthook(count, chunk_size, total_size) if chunk: yield chunk else: break response = urlopen(url, data) with open(filename, 'wb') as fd: for chunk in chunk_read(response, reporthook=reporthook): fd.write(chunk) else: from six.moves.urllib.request import urlretrieve def clip_img(X, preprocessing='raw'): X = reverse_preprocess(X, preprocessing) X = np.clip(X, 0.0, 255.0) X = preprocess(X, preprocessing) return X def load_image(path): try: img = Image.open(path) except PIL.UnidentifiedImageError: return None except IsADirectoryError: return None try: info = img._getexif() except OSError: return None if info is not None: for orientation in ExifTags.TAGS.keys(): if ExifTags.TAGS[orientation] == 'Orientation': break exif = dict(img._getexif().items()) if orientation in exif.keys(): if exif[orientation] == 3: img = img.rotate(180, expand=True) elif exif[orientation] == 6: img = img.rotate(270, expand=True) elif exif[orientation] == 8: img = img.rotate(90, expand=True) else: pass img = img.convert('RGB') image_array = image.img_to_array(img) return image_array def filter_image_paths(image_paths): print("Identify {} files in the directory".format(len(image_paths))) new_image_paths = [] new_images = [] for p in image_paths: img = load_image(p) if img is None: print("{} is not an image file, skipped".format(p.split("/")[-1])) continue new_image_paths.append(p) new_images.append(img) print("Identify {} images in the directory".format(len(new_image_paths))) return new_image_paths, new_images class Faces(object): def __init__(self, image_paths, loaded_images, aligner, verbose=1, eval_local=False, preprocessing=True): self.image_paths = image_paths self.verbose = verbose self.aligner = aligner self.org_faces = [] self.cropped_faces = [] self.cropped_faces_shape = [] self.cropped_index = [] self.callback_idx = [] for i in range(0, len(loaded_images)): cur_img = loaded_images[i] p = image_paths[i] self.org_faces.append(cur_img) if eval_local: margin = 0 else: margin = 0.7 align_img = align(cur_img, self.aligner, margin=margin) if align_img is None: print("Find 0 face(s)".format(p.split("/")[-1])) continue cur_faces = align_img[0] cur_shapes = [f.shape[:-1] for f in cur_faces] cur_faces_square = [] if verbose: print("Find {} face(s) in {}".format(len(cur_faces), p.split("/")[-1])) if eval_local: cur_faces = cur_faces[:1] for img in cur_faces: if eval_local: base = resize(img, (224, 224)) else: long_size = max([img.shape[1], img.shape[0]]) base = np.zeros((long_size, long_size, 3)) base[0:img.shape[0], 0:img.shape[1], :] = img cur_faces_square.append(base) cur_index = align_img[1] cur_faces_square = [resize(f, (224, 224)) for f in cur_faces_square] self.cropped_faces_shape.extend(cur_shapes) self.cropped_faces.extend(cur_faces_square) self.cropped_index.extend(cur_index) self.callback_idx.extend([i] * len(cur_faces_square)) if len(self.cropped_faces) == 0: return self.cropped_faces = np.array(self.cropped_faces) if preprocessing: self.cropped_faces = preprocess(self.cropped_faces, 'imagenet') self.cloaked_cropped_faces = None self.cloaked_faces = np.copy(self.org_faces) def get_faces(self): return self.cropped_faces def merge_faces(self, protected_images, original_images): self.cloaked_faces = np.copy(self.org_faces) for i in range(len(self.cropped_faces)): cur_protected = protected_images[i] cur_original = original_images[i] org_shape = self.cropped_faces_shape[i] old_square_shape = max([org_shape[0], org_shape[1]]) cur_protected = resize(cur_protected, (old_square_shape, old_square_shape)) cur_original = resize(cur_original, (old_square_shape, old_square_shape)) reshape_cloak = cur_protected - cur_original reshape_cloak = reshape_cloak[0:org_shape[0], 0:org_shape[1], :] callback_id = self.callback_idx[i] bb = self.cropped_index[i] self.cloaked_faces[callback_id][bb[1]:bb[3], bb[0]:bb[2], :] += reshape_cloak for i in range(0, len(self.cloaked_faces)): self.cloaked_faces[i] = np.clip(self.cloaked_faces[i], 0.0, 255.0) return self.cloaked_faces def dump_dictionary_as_json(dict, outfile): j = json.dumps(dict) with open(outfile, "wb") as f: f.write(j.encode()) def load_victim_model(number_classes, teacher_model=None, end2end=False): for l in teacher_model.layers: l.trainable = end2end x = teacher_model.layers[-1].output x = Dense(number_classes)(x) x = Activation('softmax', name="act")(x) model = Model(teacher_model.input, x) opt = keras.optimizers.Adadelta() model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) return model def resize(img, sz): assert np.min(img) >= 0 and np.max(img) <= 255.0 from keras.preprocessing import image im_data = image.array_to_img(img).resize((sz[1], sz[0])) im_data = image.img_to_array(im_data) return im_data def init_gpu(gpu_index, force=False): if isinstance(gpu_index, list): gpu_num = ','.join([str(i) for i in gpu_index]) else: gpu_num = str(gpu_index) if "CUDA_VISIBLE_DEVICES" in os.environ and os.environ["CUDA_VISIBLE_DEVICES"] and not force: print('GPU already initiated') return os.environ["CUDA_VISIBLE_DEVICES"] = gpu_num sess = fix_gpu_memory() return sess def fix_gpu_memory(mem_fraction=1): os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' tf_config = None if tf.test.is_gpu_available(): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=mem_fraction) tf_config = tf.ConfigProto(gpu_options=gpu_options) tf_config.gpu_options.allow_growth = True tf_config.log_device_placement = False init_op = tf.global_variables_initializer() sess = tf.Session(config=tf_config) sess.run(init_op) K.set_session(sess) return sess def preprocess(X, method): assert method in {'raw', 'imagenet', 'inception', 'mnist'} if method == 'raw': pass elif method == 'imagenet': X = imagenet_preprocessing(X) else: raise Exception('unknown method %s' % method) return X def reverse_preprocess(X, method): assert method in {'raw', 'imagenet', 'inception', 'mnist'} if method == 'raw': pass elif method == 'imagenet': X = imagenet_reverse_preprocessing(X) else: raise Exception('unknown method %s' % method) return X def imagenet_preprocessing(x, data_format=None): if data_format is None: data_format = K.image_data_format() assert data_format in ('channels_last', 'channels_first') x = np.array(x) if data_format == 'channels_first': # 'RGB'->'BGR' if x.ndim == 3: x = x[::-1, ...] else: x = x[:, ::-1, ...] else: # 'RGB'->'BGR' x = x[..., ::-1] mean = [103.939, 116.779, 123.68] std = None # Zero-center by mean pixel if data_format == 'channels_first': if x.ndim == 3: x[0, :, :] -= mean[0] x[1, :, :] -= mean[1] x[2, :, :] -= mean[2] if std is not None: x[0, :, :] /= std[0] x[1, :, :] /= std[1] x[2, :, :] /= std[2] else: x[:, 0, :, :] -= mean[0] x[:, 1, :, :] -= mean[1] x[:, 2, :, :] -= mean[2] if std is not None: x[:, 0, :, :] /= std[0] x[:, 1, :, :] /= std[1] x[:, 2, :, :] /= std[2] else: x[..., 0] -= mean[0] x[..., 1] -= mean[1] x[..., 2] -= mean[2] if std is not None: x[..., 0] /= std[0] x[..., 1] /= std[1] x[..., 2] /= std[2] return x def imagenet_reverse_preprocessing(x, data_format=None): import keras.backend as K x = np.array(x) if data_format is None: data_format = K.image_data_format() assert data_format in ('channels_last', 'channels_first') if data_format == 'channels_first': if x.ndim == 3: # Zero-center by mean pixel x[0, :, :] += 103.939 x[1, :, :] += 116.779 x[2, :, :] += 123.68 # 'BGR'->'RGB' x = x[::-1, :, :] else: x[:, 0, :, :] += 103.939 x[:, 1, :, :] += 116.779 x[:, 2, :, :] += 123.68 x = x[:, ::-1, :, :] else: # Zero-center by mean pixel x[..., 0] += 103.939 x[..., 1] += 116.779 x[..., 2] += 123.68 # 'BGR'->'RGB' x = x[..., ::-1] return x def reverse_process_cloaked(x, preprocess='imagenet'): # x = clip_img(x, preprocess) return reverse_preprocess(x, preprocess) def build_bottleneck_model(model, cut_off): bottleneck_model = Model(model.input, model.get_layer(cut_off).output) bottleneck_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) return bottleneck_model def load_extractor(name): model_dir = os.path.join(os.path.expanduser('~'), '.fawkes') os.makedirs(model_dir, exist_ok=True) model_file = os.path.join(model_dir, "{}.h5".format(name)) emb_file = os.path.join(model_dir, "{}_emb.p.gz".format(name)) if os.path.exists(model_file): model = keras.models.load_model(model_file) else: print("Download models...") get_file("{}.h5".format(name), "http://sandlab.cs.uchicago.edu/fawkes/files/{}.h5".format(name), cache_dir=model_dir, cache_subdir='') model = keras.models.load_model(model_file) if not os.path.exists(emb_file): get_file("{}_emb.p.gz".format(name), "http://sandlab.cs.uchicago.edu/fawkes/files/{}_emb.p.gz".format(name), cache_dir=model_dir, cache_subdir='') if hasattr(model.layers[-1], "activation") and model.layers[-1].activation == "softmax": raise Exception( "Given extractor's last layer is softmax, need to remove the top layers to make it into a feature extractor") return model def get_dataset_path(dataset): model_dir = os.path.join(os.path.expanduser('~'), '.fawkes') if not os.path.exists(os.path.join(model_dir, "config.json")): raise Exception("Please config the datasets before running protection code. See more in README and config.py.") config = json.load(open(os.path.join(model_dir, "config.json"), 'r')) if dataset not in config: raise Exception( "Dataset {} does not exist, please download to data/ and add the path to this function... Abort".format( dataset)) return config[dataset]['train_dir'], config[dataset]['test_dir'], config[dataset]['num_classes'], config[dataset][ 'num_images'] def dump_image(x, filename, format="png", scale=False): img = image.array_to_img(x, scale=scale) img.save(filename, format) return def load_embeddings(feature_extractors_names): model_dir = os.path.join(os.path.expanduser('~'), '.fawkes') for extractor_name in feature_extractors_names: fp = gzip.open(os.path.join(model_dir, "{}_emb.p.gz".format(extractor_name)), 'rb') path2emb = pickle.load(fp) fp.close() return path2emb def extractor_ls_predict(feature_extractors_ls, X): feature_ls = [] for extractor in feature_extractors_ls: cur_features = extractor.predict(X) feature_ls.append(cur_features) concated_feature_ls = np.concatenate(feature_ls, axis=1) return concated_feature_ls def pairwise_l2_distance(A, B): BT = B.transpose() vecProd = np.dot(A, BT) SqA = A ** 2 sumSqA = np.matrix(np.sum(SqA, axis=1)) sumSqAEx = np.tile(sumSqA.transpose(), (1, vecProd.shape[1])) SqB = B ** 2 sumSqB = np.sum(SqB, axis=1) sumSqBEx = np.tile(sumSqB, (vecProd.shape[0], 1)) SqED = sumSqBEx + sumSqAEx - 2 * vecProd SqED[SqED < 0] = 0.0 ED = np.sqrt(SqED) return ED def select_target_label(imgs, feature_extractors_ls, feature_extractors_names, metric='l2'): model_dir = os.path.join(os.path.expanduser('~'), '.fawkes') original_feature_x = extractor_ls_predict(feature_extractors_ls, imgs) path2emb = load_embeddings(feature_extractors_names) items = list([(k, v) for k, v in path2emb.items()]) paths = [p[0] for p in items] embs = [p[1] for p in items] embs = np.array(embs) pair_dist = pairwise_l2_distance(original_feature_x, embs) pair_dist = np.array(pair_dist) max_sum = np.min(pair_dist, axis=0) max_id_ls = np.argsort(max_sum)[::-1] max_id = random.choice(max_id_ls[:20]) target_data_id = paths[int(max_id)] print("target ID: {}".format(target_data_id)) image_dir = os.path.join(model_dir, "target_data/{}".format(target_data_id)) os.makedirs(os.path.join(model_dir, "target_data"), exist_ok=True) os.makedirs(image_dir, exist_ok=True) for i in range(10): if os.path.exists(os.path.join(model_dir, "target_data/{}/{}.jpg".format(target_data_id, i))): continue try: get_file("{}.jpg".format(i), "http://sandlab.cs.uchicago.edu/fawkes/files/target_data/{}/{}.jpg".format(target_data_id, i), cache_dir=model_dir, cache_subdir='target_data/{}/'.format(target_data_id)) except Exception: pass image_paths = glob.glob(image_dir + "/*.jpg") target_images = [image.img_to_array(image.load_img(cur_path)) for cur_path in image_paths] target_images = np.array([resize(x, (224, 224)) for x in target_images]) target_images = preprocess(target_images, 'imagenet') target_images = list(target_images) while len(target_images) < len(imgs): target_images += target_images target_images = random.sample(target_images, len(imgs)) return np.array(target_images) def get_file(fname, origin, untar=False, md5_hash=None, file_hash=None, cache_subdir='datasets', hash_algorithm='auto', extract=False, archive_format='auto', cache_dir=None): if cache_dir is None: cache_dir = os.path.join(os.path.expanduser('~'), '.fawkes') if md5_hash is not None and file_hash is None: file_hash = md5_hash hash_algorithm = 'md5' datadir_base = os.path.expanduser(cache_dir) if not os.access(datadir_base, os.W_OK): datadir_base = os.path.join('/tmp', '.fawkes') datadir = os.path.join(datadir_base, cache_subdir) _makedirs_exist_ok(datadir) if untar: untar_fpath = os.path.join(datadir, fname) fpath = untar_fpath + '.tar.gz' else: fpath = os.path.join(datadir, fname) download = False if not os.path.exists(fpath): download = True if download: error_msg = 'URL fetch failure on {}: {} -- {}' dl_progress = None try: try: urlretrieve(origin, fpath, dl_progress) except HTTPError as e: raise Exception(error_msg.format(origin, e.code, e.msg)) except URLError as e: raise Exception(error_msg.format(origin, e.errno, e.reason)) except (Exception, KeyboardInterrupt) as e: if os.path.exists(fpath): os.remove(fpath) raise # ProgressTracker.progbar = None if untar: if not os.path.exists(untar_fpath): _extract_archive(fpath, datadir, archive_format='tar') return untar_fpath if extract: _extract_archive(fpath, datadir, archive_format) return fpath def _extract_archive(file_path, path='.', archive_format='auto'): if archive_format is None: return False if archive_format == 'auto': archive_format = ['tar', 'zip'] if isinstance(archive_format, six.string_types): archive_format = [archive_format] for archive_type in archive_format: if archive_type == 'tar': open_fn = tarfile.open is_match_fn = tarfile.is_tarfile if archive_type == 'zip': open_fn = zipfile.ZipFile is_match_fn = zipfile.is_zipfile if is_match_fn(file_path): with open_fn(file_path) as archive: try: archive.extractall(path) except (tarfile.TarError, RuntimeError, KeyboardInterrupt): if os.path.exists(path): if os.path.isfile(path): os.remove(path) else: shutil.rmtree(path) raise return True return False def _makedirs_exist_ok(datadir): if six.PY2: # Python 2 doesn't have the exist_ok arg, so we try-except here. try: os.makedirs(datadir) except OSError as e: if e.errno != errno.EEXIST: raise else: os.makedirs(datadir, exist_ok=True) # pylint: disable=unexpected-keyword-arg