@inbook {4363,
title = {A Comparison of Two Different Types of Online Social Network from a Data Privacy Perspective},
booktitle = {Lecture Notes in Artificial Intelligence},
volume = {6820},
year = {2011},
pages = {223-234},
publisher = {Springer},
organization = {Springer},
abstract = {We consider two distinct types of online social network, the first made up of a log of writes to wall by users in Facebook, and the second consisting of a corpus of emails sent and received in a corporate environment (Enron). We calculate the statistics which describe the topologies of each network represented as a graph. Then we calculate the information loss and risk of disclosure for different percentages of perturbation for each dataset, where perturbation is achieved by randomly adding links to the nodes. We find that the general tendency of information loss is similar, although Facebook is affected to a greater extent. For risk of disclosure, both datasets also follow a similar trend, except for the average path length statistic. We find that the differences are due to the different distributions of the derived factors, and also the type of perturbation used and its parameterization. These results can be useful for choosing and tuning anonymization methods for different graph datasets.},
keywords = {Data Privacy, descriptive statistics, Information loss, risk of disclosure, Social network},
author = {David Nettleton and S{\'a}ez-Trumper, D. and Vicen{\c c} Torra}
}
@article {4364,
title = {Data privacy for simply anonymized network logs represented as graphs - considerations for graph alteration operations},
journal = {International Journal of Uncertainty, Fuzziness and Knowledge-Based Systems},
year = {2011},
abstract = {In this paper we review the state of the art on graph privacy with special emphasis on applications to online social networks, and we consider some novel aspects which have not been greatly covered in the specialized literature on graph privacy. The following key considerations are covered: (i) choice of different operators to modify the graph; (ii) information loss based on the cost of graph operations in terms of statistical characteristics (degree, clustering coefficient and path length) in the original graph; (iii) computational cost of the operations; (iv) in the case of the aggregation of two nodes, the choice of similar adjacent nodes rather than isomorphic topologies, in order to maintain the overall structure of the graph; (v) a statistically knowledgeable attacker who is able to search for regions of a simply anonymized graph based on statistical characteristics and map those onto a given node and its immediate neighborhood.},
keywords = {Data Privacy, graphs, heuristics., operators},
author = {David Nettleton and Vicen{\c c} Torra}
}