@article{3021171, title = "Machine learning and data mining frameworks for predicting drug response in cancer: An overview and a novel in silico screening process based on association rule mining", author = "Vougas, K. and Sakellaropoulos, T. and Kotsinas, A. and Foukas, G.-R.P. and Ntargaras, A. and Koinis, F. and Polyzos, A. and Myrianthopoulos, V. and Zhou, H. and Narang, S. and Georgoulias, V. and Alexopoulos, L. and Aifantis, I. and Townsend, P.A. and Sfikakis, P. and Fitzgerald, R. and Thanos, D. and Bartek, J. and Petty, R. and Tsirigos, A. and Gorgoulis, V.G.", journal = "Pharmacology and Therapeutics", year = "2019", volume = "203", publisher = "ELSEVIER SCIENCE INC 360 PARK AVE SOUTH, NEW YORK, NY 10010-1710 USA", issn = "0163-7258", doi = "10.1016/j.pharmthera.2019.107395", keywords = "antineoplastic agent, algorithm; artificial neural network; association rule mining; comparative study; computer model; computer prediction; conceptual framework; data mining; decision tree; deep learning; drug response; drug screening; human; machine learning; malignant neoplasm; personalized medicine; priority journal; random forest; Review; supervised machine learning; transfer of learning; unsupervised machine learning; animal; computer simulation; neoplasm; treatment outcome, Animals; Computer Simulation; Data Mining; Humans; Machine Learning; Neoplasms; Treatment Outcome", abstract = "A major challenge in cancer treatment is predicting the clinical response to anti-cancer drugs on a personalized basis. The success of such a task largely depends on the ability to develop computational resources that integrate big “omic” data into effective drug-response models. Machine learning is both an expanding and an evolving computational field that holds promise to cover such needs. Here we provide a focused overview of: 1) the various supervised and unsupervised algorithms used specifically in drug response prediction applications, 2) the strategies employed to develop these algorithms into applicable models, 3) data resources that are fed into these frameworks and 4) pitfalls and challenges to maximize model performance. In this context we also describe a novel in silico screening process, based on Association Rule Mining, for identifying genes as candidate drivers of drug response and compare it with relevant data mining frameworks, for which we generated a web application freely available at: https://compbio.nyumc.org/drugs/. This pipeline explores with high efficiency large sample-spaces, while is able to detect low frequency events and evaluate statistical significance even in the multidimensional space, presenting the results in the form of easily interpretable rules. We conclude with future prospects and challenges of applying machine learning based drug response prediction in precision medicine. © 2019 Elsevier Inc." }