@article{TCR119186,
author = {Changhong Zhang and Yuxing Zheng and Chengyu Fang and Yanhua Huang and Junping Liu},
title = {Development and validation of an interpretable model based on ultrasound radiomics for predicting Ki-67 expression levels in breast cancer},
journal = {Translational Cancer Research},
volume = {0},
number = {0},
year = {2026},
keywords = {},
abstract = {Background: Ki-67 is a critical proliferation marker in breast cancer, but its preoperative assessment is limited by the invasiveness and sampling bias of core needle biopsy. This study aimed to establish and validate non-invasive prediction models of Ki-67 status of breast cancer based on conventional ultrasound radiomics features, clinical features, or their combination.Methods: Retrospective analysis was performed on 558 patients with breast cancer who underwent two-dimensional (2D) ultrasound and Ki-67 detection. Among them, 398 patients in the training set were from Zhejiang Cancer Hospital, and 160 patients in the external validation set were from Lishui Central Hospital. According to the 14% threshold, the patients were divided into Ki-67 low expression group and Ki-67 high expression group. Clinical parameters, conventional ultrasound characteristics, and 2D ultrasound images of the tumor’s maximum cross-section were collected. Radiomics features were extracted from the delineated regions of interest (ROIs) with the PyRadiomics package. We used univariate analysis, least absolute shrinkage and selection operator (LASSO) regression, and multivariate logistic regression to determine the independent predictors. Three models—clinical, radiomics, and a combined clinical-radiomics model—were developed. We constructed a nomogram based on the combined model. Model evaluation was undertaken via receiver operating characteristic (ROC) curve analysis [calculating area under the curve (AUC), accuracy, sensitivity, specificity, positive predictive value (PPV), negative predictive value (NPV), precision, recall, and F1 score], calibration curves, and decision curve analysis (DCA). In addition, SHapley Additive exPlanations (SHAP) were used to interpret the model.Results: In the training (n=398) and external validation (n=160) sets, multivariate logistic regression identified age [odds ratio (OR) =0.971, P=0.026], maximum lesion diameter (OR =1.051, P},
issn = {2219-6803}, url = {https://tcr.amegroups.org/article/view/119186}
}