@article{3029958, title = "Recommendations for accurate genotyping of SARS-CoV-2 using amplicon-based sequencing of clinical samples", author = "Kubik, Slawomir and Marques, Ana Claudia and Xing, Xiaobin and Silvery, and Janine and Bertelli, Claire and De Maio, Flavio and Pournaras, Spyros and and Burr, Tom and Duffourd, Yannis and Siemens, Helena and Alloui, and Chakib and Song, Lin and Wenger, Yvan and Saitta, Alexandra and and Macheret, Morgane and Smith, Ewan W. and Menu, Philippe and Brayer, and Marion and Steinmetz, Lars M. and Si-Mohammed, Ali and Chuisseu, Josiane and and Stevens, Richard and Constantoulakis, Pantelis and Sali, Michela and and Greub, Gilbert and Tiemann, Carsten and Pelechano, Vicent and Willig, and Adrian and Xu, Zhenyu", journal = "Clinical Microbiology and Infection", year = "2021", volume = "27", number = "7", publisher = "Elsevier Sci Ltd, Exeter, United Kingdom", issn = "1198-743X", doi = "10.1016/j.cmi.2021.03.029", keywords = "Amplicon; Coronavirus; Genome; Genotyping; Guidelines; Next-generation sequencing; NGS; Recommendations; SARS-CoV-2", abstract = "Objectives: Genotyping of severe acute respiratory syndrome coronavirus 2 (SARS-CoV-2) has been instrumental in monitoring viral evolution and transmission during the pandemic. The quality of the sequence data obtained from these genotyping efforts depends on several factors, including the quantity/ integrity of the input material, the technology, and laboratory-specific implementation. The current lack of guidelines for SARS-CoV-2 genotyping leads to inclusion of error-containing genome sequences in genomic epidemiology studies. We aimed to establish clear and broadly applicable recommendations for reliable virus genotyping. Methods: We established and used a sequencing data analysis workflow that reliably identifies and removes technical artefacts; such artefacts can result in miscalls when using alternative pipelines to process clinical samples and synthetic viral genomes with an amplicon-based genotyping approach. We evaluated the impact of experimental factors, including viral load and sequencing depth, on correct sequence determination. Results: We found that at least 1000 viral genomes are necessary to confidently detect variants in the SARS-CoV-2 genome at frequencies of >= 10%. The broad applicability of our recommendations was validated in over 200 clinical samples from six independent laboratories. The genotypes we determined for clinical isolates with sufficient quality cluster by sampling location and period. Our analysis also supports the rise in frequencies of 20A.EU1 and 20A.EU2, two recently reported European strains whose dissemination was facilitated by travel during the summer of 2020. Conclusions: We present much-needed recommendations for the reliable determination of SARS-CoV-2 genome sequences and demonstrate their broad applicability in a large cohort of clinical samples. (C) 2021 The Author(s). Published by Elsevier Ltd on behalf of European Society of Clinical Microbiology and Infectious Diseases." }