We have collaborated on the following publications
2023
Sládeček, T.; Gažiová, M.; Kucharík, M.; Zaťková, A.; Pös, Z.; Pös, O.; Krampl, W.; Tomková, E.; Hýblová, M.; Minárik, G.; Radvánszky, J.; Budiš, J.; Szemes, T.
In: Scientific Reports, 13 (1), 2023, ISSN: 20452322.
Abstract | Links | BibTeX | Tags: Computational method, Copy number variation
@article{Sládeček2023,
title = {Combination of expert guidelines-based and machine learning-based approaches leads to superior accuracy of automated prediction of clinical effect of copy number variations},
author = {T. Sládeček and M. Gažiová and M. Kucharík and A. Zaťková and Z. Pös and O. Pös and W. Krampl and E. Tomková and M. Hýblová and G. Minárik and J. Radvánszky and J. Budiš and T. Szemes},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85163886010&doi=10.1038%2fs41598-023-37352-1&partnerID=40&md5=9752e9d83f2d1eca8a028bdffc3fd1f8},
doi = {10.1038/s41598-023-37352-1},
issn = {20452322},
year = {2023},
date = {2023-01-01},
urldate = {2023-01-01},
journal = {Scientific Reports},
volume = {13},
number = {1},
publisher = {Nature Research},
abstract = {Clinical interpretation of copy number variants (CNVs) is a complex process that requires skilled clinical professionals. General recommendations have been recently released to guide the CNV interpretation based on predefined criteria to uniform the decision process. Several semiautomatic computational methods have been proposed to recommend appropriate choices, relieving clinicians of tedious searching in vast genomic databases. We have developed and evaluated such a tool called MarCNV and tested it on CNV records collected from the ClinVar database. Alternatively, the emerging machine learning-based tools, such as the recently published ISV (Interpretation of Structural Variants), showed promising ways of even fully automated predictions using broader characterization of affected genomic elements. Such tools utilize features additional to ACMG criteria, thus providing supporting evidence and the potential to improve CNV classification. Since both approaches contribute to evaluation of CNVs clinical impact, we propose a combined solution in the form of a decision support tool based on automated ACMG guidelines (MarCNV) supplemented by a machine learning-based pathogenicity prediction (ISV) for the classification of CNVs. We provide evidence that such a combined approach is able to reduce the number of uncertain classifications and reveal potentially incorrect classifications using automated guidelines. CNV interpretation using MarCNV, ISV, and combined approach is available for non-commercial use at https://predict.genovisio.com/ . © 2023, The Author(s).},
keywords = {Computational method, Copy number variation},
pubstate = {published},
tppubtype = {article}
}
Baláž, A.; Kajsik, M.; Budiš, J.; Szemes, T.; Turňa, J.
PHERI—Phage Host ExploRation Pipeline Journal Article
In: Microorganisms, 11 (6), 2023, ISSN: 20762607.
Abstract | Links | BibTeX | Tags: Bacteriophages, Computational method
@article{Baláž2023,
title = {PHERI—Phage Host ExploRation Pipeline},
author = {A. Baláž and M. Kajsik and J. Budiš and T. Szemes and J. Turňa},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85164160201&doi=10.3390%2fmicroorganisms11061398&partnerID=40&md5=0a09643955666b6b600356ced3043434},
doi = {10.3390/microorganisms11061398},
issn = {20762607},
year = {2023},
date = {2023-01-01},
urldate = {2023-01-01},
journal = {Microorganisms},
volume = {11},
number = {6},
publisher = {MDPI},
abstract = {Antibiotic resistance is becoming a common problem in medicine, food, and industry, with multidrug-resistant bacterial strains occurring in all regions. One of the possible future solutions is the use of bacteriophages. Phages are the most abundant form of life in the biosphere, so we can highly likely purify a specific phage against each target bacterium. The identification and consistent characterization of individual phages was a common form of phage work and included determining bacteriophages’ host-specificity. With the advent of new modern sequencing methods, there was a problem with the detailed characterization of phages in the environment identified by metagenome analysis. The solution to this problem may be to use a bioinformatic approach in the form of prediction software capable of determining a bacterial host based on the phage whole-genome sequence. The result of our research is the machine learning algorithm-based tool called PHERI. PHERI predicts the suitable bacterial host genus for the purification of individual viruses from different samples. In addition, it can identify and highlight protein sequences that are important for host selection. © 2023 by the authors.},
keywords = {Bacteriophages, Computational method},
pubstate = {published},
tppubtype = {article}
}
2022
Gažiová, M.; Sládeček, T.; Pös, O.; Števko, M.; Krampl, W.; Pös, Z.; Hekel, R.; Hlavačka, M.; Kucharík, M.; Radvánszky, J.; Budiš, J.; Szemes, T.
Automated prediction of the clinical impact of structural copy number variations Journal Article
In: Scientific Reports, 12 (1), 2022, ISSN: 20452322.
Abstract | Links | BibTeX | Tags: Computational method, Copy number variation, Variant interpretation
@article{Gažiová2022,
title = {Automated prediction of the clinical impact of structural copy number variations},
author = {M. Gažiová and T. Sládeček and O. Pös and M. Števko and W. Krampl and Z. Pös and R. Hekel and M. Hlavačka and M. Kucharík and J. Radvánszky and J. Budiš and T. Szemes},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85122796228&doi=10.1038%2fs41598-021-04505-z&partnerID=40&md5=2826a9187c8d22af2fdc6f5d22911cf2},
doi = {10.1038/s41598-021-04505-z},
issn = {20452322},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
journal = {Scientific Reports},
volume = {12},
number = {1},
publisher = {Nature Research},
abstract = {Copy number variants (CNVs) play an important role in many biological processes, including the development of genetic diseases, making them attractive targets for genetic analyses. The interpretation of the effect of these structural variants is a challenging problem due to highly variable numbers of gene, regulatory, or other genomic elements affected by the CNV. This led to the demand for the interpretation tools that would relieve researchers, laboratory diagnosticians, genetic counselors, and clinical geneticists from the laborious process of annotation and classification of CNVs. We designed and validated a prediction method (ISV; Interpretation of Structural Variants) that is based on boosted trees which takes into account annotations of CNVs from several publicly available databases. The presented approach achieved more than 98% prediction accuracy on both copy number loss and copy number gain variants while also allowing CNVs being assigned “uncertain” significance in predictions. We believe that ISV’s prediction capability and explainability have a great potential to guide users to more precise interpretations and classifications of CNVs. © 2022, The Author(s).},
keywords = {Computational method, Copy number variation, Variant interpretation},
pubstate = {published},
tppubtype = {article}
}
2021
Kucharík, M; Budiš, J; Hýblová, M; Minárik, G; Szemes, T
Copy number variant detection with low-coverage whole-genome sequencing represents a viable alternative to the conventional array-cgh Journal Article
In: Diagnostics, 11 (4), 2021, ISSN: 20754418.
Abstract | Links | BibTeX | Tags: Cell-free nucleic acids, Computational method, Copy number variation, Liquid biopsy, Non-invasive prenatal testing
@article{Kucharík2021,
title = {Copy number variant detection with low-coverage whole-genome sequencing represents a viable alternative to the conventional array-cgh},
author = {M Kucharík and J Budiš and M Hýblová and G Minárik and T Szemes},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85109087040&doi=10.3390%2fdiagnostics11040708&partnerID=40&md5=6fdfa35027032bf889399d967bb1cce9},
doi = {10.3390/diagnostics11040708},
issn = {20754418},
year = {2021},
date = {2021-01-01},
urldate = {2021-01-01},
journal = {Diagnostics},
volume = {11},
number = {4},
publisher = {MDPI AG},
abstract = {Copy number variations (CNVs) represent a type of structural variant involving alterations in the number of copies of specific regions of DNA that can either be deleted or duplicated. CNVs contribute substantially to normal population variability, however, abnormal CNVs cause numerous genetic disorders. At present, several methods for CNV detection are applied, ranging from the conventional cytogenetic analysis, through microarray-based methods (aCGH), to next-generation sequencing (NGS). In this paper, we present GenomeScreen, an NGS-based CNV detection method for low-coverage, whole-genome sequencing. We determined the theoretical limits of its accuracy and obtained confirmation in an extensive in silico study and in real patient samples with known genotypes. In theory, at least 6 M uniquely mapped reads are required to detect a CNV with the length of 100 kilobases (kb) or more with high confidence (Z-score > 7). In practice, the in silico analysis required at least 8 M to obtain >99% accuracy (for 100 kb deviations). We compared GenomeScreen with one of the currently used aCGH methods in diagnostic laboratories, which has mean resolution of 200 kb. GenomeScreen and aCGH both detected 59 deviations, while GenomeScreen furthermore detected 134 other (usually) smaller variations. When compared to aCGH, overall performance of the proposed GenemoScreen tool is comparable or superior in terms of accuracy, turn-around time, and cost-effectiveness, thus providing reasonable benefits, particularly in a prenatal diagnosis setting. © 2021 by the authors. Licensee MDPI, Basel, Switzerland.},
keywords = {Cell-free nucleic acids, Computational method, Copy number variation, Liquid biopsy, Non-invasive prenatal testing},
pubstate = {published},
tppubtype = {article}
}
2019
Gazdarica, J; Hekel, R; Budis, J; Kucharik, M; Duris, F; Radvanszky, J; Turna, J; Szemes, T
Combination of fetal fraction estimators based on fragment lengths and fragment counts in non-invasive prenatal testing Journal Article
In: International Journal of Molecular Sciences, 20 (16), 2019, ISSN: 16616596.
Abstract | Links | BibTeX | Tags: Aneuploidy, Computational method, Fetal fraction, Non-invasive prenatal testing
@article{Gazdarica2019,
title = {Combination of fetal fraction estimators based on fragment lengths and fragment counts in non-invasive prenatal testing},
author = {J Gazdarica and R Hekel and J Budis and M Kucharik and F Duris and J Radvanszky and J Turna and T Szemes},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85071515046&doi=10.3390%2fijms20163959&partnerID=40&md5=13c0a797eeb085dba097cea3d39680a9},
doi = {10.3390/ijms20163959},
issn = {16616596},
year = {2019},
date = {2019-01-01},
journal = {International Journal of Molecular Sciences},
volume = {20},
number = {16},
publisher = {MDPI AG},
abstract = {The reliability of non-invasive prenatal testing is highly dependent on accurate estimation of fetal fraction. Several methods have been proposed up to date, utilizing different attributes of analyzed genomic material, for example length and genomic location of sequenced DNA fragments. These two sources of information are relatively unrelated, but so far, there have been no published attempts to combine them to get an improved predictor. We collected 2454 single euploid male fetus samples from women undergoing NIPT testing. Fetal fractions were calculated using several proposed predictors and the state-of-the-art SeqFF method. Predictions were compared with the reference Y-based method. We demonstrate that prediction based on length of sequenced DNA fragments may achieve nearly the same precision as the state-of-the-art methods based on their genomic locations. We also show that combination of several sample attributes leads to a predictor that has superior prediction accuracy over any single approach. Finally, appropriate weighting of samples in the training process may achieve higher accuracy for samples with low fetal fraction and so allow more reliability for subsequent testing for genomic aberrations. We propose several improvements in fetal fraction estimation with a special focus on the samples most prone to wrong conclusion. © 2019 by the authors. Licensee MDPI, Basel, Switzerland.},
keywords = {Aneuploidy, Computational method, Fetal fraction, Non-invasive prenatal testing},
pubstate = {published},
tppubtype = {article}
}
Gazdarica, J; Budis, J; Duris, F; Turna, J; Szemes, T
Adaptable model parameters in non-invasive prenatal testing lead to more stable predictions Journal Article
In: International Journal of Molecular Sciences, 20 (14), 2019, ISSN: 16616596.
Abstract | Links | BibTeX | Tags: Aneuploidy, Computational method, Non-invasive prenatal testing
@article{Gazdarica2019b,
title = {Adaptable model parameters in non-invasive prenatal testing lead to more stable predictions},
author = {J Gazdarica and J Budis and F Duris and J Turna and T Szemes},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85070461646&doi=10.3390%2fijms20143414&partnerID=40&md5=055222c81da89a9a0300464ee09b9c1b},
doi = {10.3390/ijms20143414},
issn = {16616596},
year = {2019},
date = {2019-01-01},
journal = {International Journal of Molecular Sciences},
volume = {20},
number = {14},
publisher = {MDPI AG},
abstract = {Recent advances in massively parallel shotgun sequencing opened up new options for affordable non-invasive prenatal testing (NIPT) for fetus aneuploidy from DNA material extracted from maternal plasma. Tests typically compare chromosomal distributions of a tested sample with a control set of healthy samples with unaffected fetuses. Deviations above certain threshold levels are concluded as positive findings. The main problem with this approach is that the variance of the control set is dependent on the number of sequenced fragments. The higher the amount, the more precise the estimation of actual chromosomal proportions is. Testing a sample with a highly different number of sequenced reads as used in training may thus lead to over- or under-estimation of their variance, and so lead to false predictions. We propose the calculation of a variance for each tested sample adaptively, based on the actual number of its sequenced fragments. We demonstrate how it leads to more stable predictions, mainly in real-world diagnostics with the highly divergent inter-sample coverage. © 2019 by the authors. Licensee MDPI, Basel, Switzerland.},
keywords = {Aneuploidy, Computational method, Non-invasive prenatal testing},
pubstate = {published},
tppubtype = {article}
}
Budiš, J; Kucharík, M; Duriš, F; Gazdarica, J; Zrubcová, M; Ficek, A; Szemes, T; Brejová, B; Radvanszky, J
Dante: Genotyping of known complex and expanded short tandem repeats Journal Article
In: Bioinformatics, 35 (8), pp. 1310-1317, 2019, ISSN: 13674803.
Abstract | Links | BibTeX | Tags: Computational method, Genetic testing, Short tandem repeats, Variant calling
@article{Budiš20191310,
title = {Dante: Genotyping of known complex and expanded short tandem repeats},
author = {J Budiš and M Kucharík and F Duriš and J Gazdarica and M Zrubcová and A Ficek and T Szemes and B Brejová and J Radvanszky},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85064435619&doi=10.1093%2fbioinformatics%2fbty791&partnerID=40&md5=7e873f64aff7726aeb724a3a0c37237f},
doi = {10.1093/bioinformatics/bty791},
issn = {13674803},
year = {2019},
date = {2019-01-01},
journal = {Bioinformatics},
volume = {35},
number = {8},
pages = {1310-1317},
publisher = {Oxford University Press},
abstract = {Motivation: Short tandem repeats (STRs) are stretches of repetitive DNA in which short sequences, typically made of 2-6 nucleotides, are repeated several times. Since STRs have many important biological roles and also belong to the most polymorphic parts of the human genome, they became utilized in several molecular-genetic applications. Precise genotyping of STR alleles, therefore, was of high relevance during the last decades. Despite this, massively parallel sequencing (MPS) still lacks the analysis methods to fully utilize the information value of STRs in genome scale assays. Results: We propose an alignment-free algorithm, called Dante, for genotyping and characterization of STR alleles at user-specified known loci based on sequence reads originating from STR loci of interest. The method accounts for natural deviations from the expected sequence, such as variation in the repeat count, sequencing errors, ambiguous bases and complex loci containing several different motifs. In addition, we implemented a correction for copy number defects caused by the polymerase induced stutter effect as well as a prediction of STR expansions that, according to the conventional view, cannot be fully captured by inherently short MPS reads. We tested Dante on simulated datasets and on datasets obtained by targeted sequencing of protein coding parts of thousands of selected clinically relevant genes. In both these datasets, Dante outperformed HipSTR and GATK genotyping tools. Furthermore, Dante was able to predict allele expansions in all tested clinical cases. Availability and implementation: Dante is open source software, freely available for download at https://github.com/jbudis/dante. © The Author(s) 2018. Published by Oxford University Press. All rights reserved.},
keywords = {Computational method, Genetic testing, Short tandem repeats, Variant calling},
pubstate = {published},
tppubtype = {article}
}
Budis, J; Gazdarica, J; Radvanszky, J; Szucs, G; Kucharik, M; Strieskova, L; Gazdaricova, I; Harsanyova, M; Duris, F; Minarik, G; Sekelska, M; Nagy, B; Turna, J; Szemes, T
Combining count- And length-based z-scores leads to improved predictions in non-invasive prenatal testing Journal Article
In: Bioinformatics, 35 (8), pp. 1284-1291, 2019, ISSN: 13674803.
Abstract | Links | BibTeX | Tags: Aneuploidy, Computational method, Fetal fraction, Non-invasive prenatal testing, Prenatal diagnosis
@article{Budis20191284,
title = {Combining count- And length-based z-scores leads to improved predictions in non-invasive prenatal testing},
author = {J Budis and J Gazdarica and J Radvanszky and G Szucs and M Kucharik and L Strieskova and I Gazdaricova and M Harsanyova and F Duris and G Minarik and M Sekelska and B Nagy and J Turna and T Szemes},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85067353429&doi=10.1093%2fbioinformatics%2fbty806&partnerID=40&md5=c63c304db3eb59cb922d0ca8e3a9e76a},
doi = {10.1093/bioinformatics/bty806},
issn = {13674803},
year = {2019},
date = {2019-01-01},
journal = {Bioinformatics},
volume = {35},
number = {8},
pages = {1284-1291},
publisher = {Oxford University Press},
abstract = {Motivation: Non-invasive prenatal testing or NIPT is currently among the top researched topic in obstetric care. While the performance of the current state-of-the-art NIPT solutions achieve high sensitivity and specificity, they still struggle with a considerable number of samples that cannot be concluded with certainty. Such uninformative results are often subject to repeated blood sampling and re-analysis, usually after two weeks, and this period may cause a stress to the future mothers as well as increase the overall cost of the test. Results: We propose a supplementary method to traditional z-scores to reduce the number of such uninformative calls. The method is based on a novel analysis of the length profile of circulating cell free DNA which compares the change in such profiles when random-based and length-based elimination of some fragments is performed. The proposed method is not as accurate as the standard z-score; however, our results suggest that combination of these two independent methods correctly resolves a substantial portion of healthy samples with an uninformative result. Additionally, we discuss how the proposed method can be used to identify maternal aberrations, thus reducing the risk of false positive and false negative calls. Availability and implementation: The open-source code of the proposed methods, together with test data, is freely available for non-commercial users at github web page https://github.com/jbudis/lambda. © The Author(s) 2018. Published by Oxford University Press. All rights reserved.},
keywords = {Aneuploidy, Computational method, Fetal fraction, Non-invasive prenatal testing, Prenatal diagnosis},
pubstate = {published},
tppubtype = {article}
}
2018
Duris, F; Gazdarica, J; Gazdaricova, I; Strieskova, L; Budis, J; Turna, J; Szemes, T
Mean and variance of ratios of proportions from categories of a multinomial distribution Journal Article
In: Journal of Statistical Distributions and Applications, 5 (1), 2018, ISSN: 21955832.
Abstract | Links | BibTeX | Tags: Aneuploidy, Computational method, Non-invasive prenatal testing
@article{Duris2018,
title = {Mean and variance of ratios of proportions from categories of a multinomial distribution},
author = {F Duris and J Gazdarica and I Gazdaricova and L Strieskova and J Budis and J Turna and T Szemes},
url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85062689021&doi=10.1186%2fs40488-018-0083-x&partnerID=40&md5=d293ac604976bfd7c1b99a1bb71fd1f2},
doi = {10.1186/s40488-018-0083-x},
issn = {21955832},
year = {2018},
date = {2018-01-01},
journal = {Journal of Statistical Distributions and Applications},
volume = {5},
number = {1},
publisher = {Springer},
abstract = {Ratio distribution is a probability distribution representing the ratio of two random variables, each usually having a known distribution. Currently, there are results when the random variables in the ratio follow (not necessarily the same) Gaussian, Cauchy, binomial or uniform distributions. In this paper we consider a case, where the random variables in the ratio are joint binomial components of a multinomial distribution. We derived formulae for mean and variance of this ratio distribution using a simple Taylor-series approach and also a more complex approach which uses a slight modification of the original ratio. We showed that the more complex approach yields better results with simulated data. The presented results can be directly applied in the computation of confidence intervals for ratios of multinomial proportions. AMS Subject Classification: 62E20. © 2018, The Author(s).},
keywords = {Aneuploidy, Computational method, Non-invasive prenatal testing},
pubstate = {published},
tppubtype = {article}
}