2023
Cepeda, Vicente Vivanco; Nayak, Gaurav Kumar; Shah, Mubarak
GeoCLIP: Clip-Inspired Alignment between Locations and Images for Effective Worldwide Geo-localization Conference
Thirty-seventh Conference on Neural Information Processing Systems, 2023.
Abstract | Tags: | Links:
@conference{Cepeda2023,
title = {GeoCLIP: Clip-Inspired Alignment between Locations and Images for Effective Worldwide Geo-localization},
author = {Vicente Vivanco Cepeda and Gaurav Kumar Nayak and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/GeoCLIP_camera_ready_paper.pdf
https://www.crcv.ucf.edu/wp-content/uploads/2018/11/GeoCLIP_camera_ready_supplementary.pdf
https://vicentevivan.github.io/GeoCLIP/},
year = {2023},
date = {2023-12-11},
publisher = {Thirty-seventh Conference on Neural Information Processing Systems},
abstract = {Worldwide Geo-localization aims to pinpoint the precise location of images taken anywhere on Earth. This task has considerable challenges due to immense variation in geographic landscapes. The image-to-image retrieval-based approaches fail to solve this problem on a global scale as it is not feasible to construct a large gallery of images covering the entire world. Instead, existing approaches divide the globe into discrete geographic cells, transforming the problem into a classification task. However, their performance is limited by the predefined classes and often results in inaccurate localizations when an image’s location significantly deviates from its class center. To overcome these limitations, we propose GeoCLIP, a novel CLIP-inspired Image-to-GPS retrieval approach that enforces alignment between the image and its corresponding GPS locations. GeoCLIP’s location encoder models the Earth as a continuous function by employing positional encoding through random Fourier features and constructing a hierarchical representation that captures information at varying resolutions to yield a semantically rich highdimensional feature suitable to use even beyond geo-localization. To the best of our knowledge, this is the first work employing GPS encoding for geo-localization. We demonstrate the efficacy of our method via extensive experiments and ablations on benchmark datasets. We achieve competitive performance with just 20% of training data, highlighting its effectiveness even in limited-data settings. Furthermore, we qualitatively demonstrate geo-localization using a text query by leveraging CLIP backbone of our image encoder.},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Modi, Rajat; Vineet, Vibhav; Rawat, Yogesh Singh
On Occlusions in Video Action Detection: Benchmark Datasets And Training Recipes Conference
NeurIPS 2023., 2023.
Abstract | Tags: NeurIPS | Links:
@conference{Modi2023,
title = {On Occlusions in Video Action Detection: Benchmark Datasets And Training Recipes},
author = {Rajat Modi and Vibhav Vineet and Yogesh Singh Rawat},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/nips_23.pdf},
year = {2023},
date = {2023-12-10},
publisher = {NeurIPS 2023.},
abstract = {This paper explores the impact of occlusions in video action detection. We facilitate this study by introducing five new benchmark datasets namely O-UCF and OJHMDB consisting of synthetically controlled static/dynamic occlusions, OVISUCF and OVIS-JHMDB consisting of occlusions with realistic motions and Real-OUCF for occlusions in realistic-world scenarios. We formally confirm an intuitive expectation: existing models suffer a lot as occlusion severity is increased and exhibit different behaviours when occluders are static vs when they are moving. We discover several curious phenomenon emerging in neural nets: 1) transformers can naturally outperform CNN models which might have even used occlusion as a form of data augmentation during training 2) incorporating symbolic-components like capsules to such backbones allows them to bind to occluders never even seen during training and 3) Islands of agreement (similar to the ones hypothesized in Hinton et Al’s GLOM) can emerge in realistic images/videos without instance-level supervision, distillation or contrastive-based objectives2(eg. video-textual training). Such emergent properties allow us to derive simple yet effective training recipes which lead to robust occlusion models inductively satisfying the first two stages of the binding mechanism (grouping/segregation). Models leveraging these recipes outperform existing video action-detectors under occlusion by 32.3% on O-UCF, 32.7% on O-JHMDB & 2.6% on Real-OUCF in terms of the vMAP metric.},
keywords = {NeurIPS},
pubstate = {published},
tppubtype = {conference}
}
Kini, Jyoti; Khan, Fahad Shahbaz; Khan, Salman; Shah, Mubarak
CT-VOS: Cutout Prediction and Tagging for Self-Supervised Video Object Segmentation Journal Article
In: Computer Vision and Image Understanding, 2023.
Tags: CVIU, Video Object Segmentation
@article{Kini2023c,
title = {CT-VOS: Cutout Prediction and Tagging for Self-Supervised Video Object Segmentation},
author = {Jyoti Kini and Fahad Shahbaz Khan and Salman Khan and Mubarak Shah},
year = {2023},
date = {2023-10-09},
journal = {Computer Vision and Image Understanding},
keywords = {CVIU, Video Object Segmentation},
pubstate = {published},
tppubtype = {article}
}
Hanif, Asif; Naseer, Muzammal; Khan, Salman; Shah, Mubarak; Khan, Fahad Shahbaz
Frequency Domain Adversarial Training for Robust Volumetric Medical Segmentation Conference
The 26th International Conference on Medical Image Computing and Computer Assisted Intervention, MICCAI 2023, 2023.
Tags: MICCAI | Links:
@conference{nokey,
title = {Frequency Domain Adversarial Training for Robust Volumetric Medical Segmentation},
author = {Asif Hanif and Muzammal Naseer and Salman Khan and Mubarak Shah and Fahad Shahbaz Khan},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/Frequency-Domain-Adversarial-Training-for-Robust-Volumetric-Medical-Segmentation.pdf
https://github.com/asif-hanif/vafa},
doi = {https://doi.org/10.48550/arXiv.2307.07269},
year = {2023},
date = {2023-10-08},
publisher = {The 26th International Conference on Medical Image Computing and Computer Assisted Intervention, MICCAI 2023},
keywords = {MICCAI},
pubstate = {published},
tppubtype = {conference}
}
Li, Ming; Wu, Jie; Wang, Xionghui; Chen, Chen; Qin, Jie; Xiao, Xuefeng; Wang, Rui; Zheng, Min; Pan, Xin
AlignDet: Aligning Pre-training and Fine-tuning in Object Detection Conference
IEEE/CVF International Conference on Computer Vision, 2023.
Tags: ICCV | Links:
@conference{Li2023,
title = {AlignDet: Aligning Pre-training and Fine-tuning in Object Detection},
author = {Ming Li and Jie Wu and Xionghui Wang and Chen Chen and Jie Qin and Xuefeng Xiao and Rui Wang and Min Zheng and Xin Pan},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/2307.11077.pdf
https://arxiv.org/abs/2307.11077
https://github.com/liming-ai/AlignDet
https://openreview.net/forum?id=8PA2nX9v_r2
https://liming-ai.github.io/AlignDet/},
year = {2023},
date = {2023-10-02},
urldate = {2023-10-02},
publisher = {IEEE/CVF International Conference on Computer Vision},
keywords = {ICCV},
pubstate = {published},
tppubtype = {conference}
}
Peng, Qucheng; Zheng, Ce; Chen, Chen
Source-free Domain Adaptive Human Pose Estimation Conference
IEEE/CVF International Conference on Computer Vision, 2023.
Abstract | Tags: ICCV | Links:
@conference{Peng2023,
title = {Source-free Domain Adaptive Human Pose Estimation},
author = {Qucheng Peng and Ce Zheng and Chen Chen},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/2308.03202.pdf
https://arxiv.org/abs/2308.03202
https://github.com/davidpengucf/SFDAHPE},
year = {2023},
date = {2023-10-02},
urldate = {2023-10-02},
publisher = {IEEE/CVF International Conference on Computer Vision},
abstract = {Human Pose Estimation (HPE) is widely used in various fields, including motion analysis, healthcare, and virtual reality. However, the great expenses of labeled real-world datasets present a significant challenge for HPE. To overcome this, one approach is to train HPE models on synthetic datasets and then perform domain adaptation (DA) on realworld data. Unfortunately, existing DA methods for HPE neglect data privacy and security by using both source and target data in the adaptation process. To this end, we propose a new task, named sourcefree domain adaptive HPE, which aims to address the challenges of cross-domain learning of HPE without access to source data during the adaptation process. We further propose a novel framework that consists of three models: source model, intermediate model, and target model, which explores the task from both sourceprotect and target-relevant perspectives. The sourceprotect module preserves source information more effectively while resisting noise, and the target-relevant module reduces the sparsity of spatial representations by building a novel spatial probability space, and pose-specific contrastive learning and information maximization are proposed on the basis of this space. Comprehensive experiments on several domain adaptive HPE benchmarks show that the proposed method outperforms existing approaches by a considerable margin. },
keywords = {ICCV},
pubstate = {published},
tppubtype = {conference}
}
Sun, Guangyu; Mendieta, Matias; Chen, Chen
FedPerfix: Towards Partial Model Personalization of Vision Transformers in Federated Learning Conference
IEEE/CVF International Conference on Computer Vision, 2023.
Abstract | Tags: ICCV | Links:
@conference{nokey,
title = {FedPerfix: Towards Partial Model Personalization of Vision Transformers in Federated Learning},
author = {Guangyu Sun and Matias Mendieta and Chen Chen},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/2308.09160.pdf
https://arxiv.org/abs/2308.09160
https://github.com/imguangyu/FedPerfix},
year = {2023},
date = {2023-10-02},
urldate = {2023-10-02},
publisher = {IEEE/CVF International Conference on Computer Vision},
abstract = {We propose and analyze a general framework of federated learning with partial model personalization. Compared with full model personalization, partial model personalization relies on domain knowledge to select a small portion of the model to personalize, thus imposing a much smaller on-device memory footprint. We propose two federated optimization algorithms for training partially personalized models, where the shared and personal parameters are updated either simultaneously or alternately on each device, but only the shared parameters are communicated and aggregated at the server. We give convergence analyses of both algorithms for minimizing smooth nonconvex functions, providing theoretical support of them for training deep learning models. Our experiments on real-world image and text datasets demonstrate that (a) partial model personalization can obtain most of the benefit of full model personalization with a small fraction of personalized parameters, and, (b) the alternating update algorithm often outperforms the simultaneous update algorithm.},
keywords = {ICCV},
pubstate = {published},
tppubtype = {conference}
}
Luo, Jun; Mendieta, Matias; Chen, Chen
PGFed: Personalize Each Client's Global Objective for Federated Learning Conference
IEEE/CVF International Conference on Computer Vision, 2023.
Abstract | Tags: ICCV | Links:
@conference{nokey,
title = {PGFed: Personalize Each Client's Global Objective for Federated Learning},
author = {Jun Luo and Matias Mendieta and Chen Chen},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/2212.01448.pdf
https://github.com/ljaiverson/pgfed},
year = {2023},
date = {2023-10-02},
urldate = {2023-10-02},
publisher = {IEEE/CVF International Conference on Computer Vision},
abstract = {The mediocre performance of conventional federated learning (FL) over heterogeneous data has been facilitating personalized FL solutions, where, unlike conventional FL which trains a single global consensus model, different models are allowed for different clients. However, in most existing personalized FL algorithms, the collaborative knowledge across the federation was only implicitly passed to the clients in ways such as model aggregation or regularization. We observed that this implicit knowledge transfer fails to maximize the potential value of each client's empirical risk toward other clients. Based on our observation, in this work, we propose Personalized Global Federated Learning (PGFed), a novel personalized FL framework that enables each client to personalize its own global objective by explicitly and adaptively aggregating the empirical risks of itself and other clients. To avoid massive (O(N2)) communication overhead and potential privacy leakage, each client's risk is estimated through a first-order approximation for other clients' adaptive risk aggregation. On top of PGFed, we develop a momentum upgrade, dubbed PGFedMo, to more efficiently utilize clients' empirical risks. Our extensive experiments under different federated settings with benchmark datasets show consistent improvements of PGFed over the compared state-of-the-art alternatives.},
keywords = {ICCV},
pubstate = {published},
tppubtype = {conference}
}
Dang, Andong; Yang, Taojiannan; Chen, Chen
A Large-scale Study of Spatiotemporal Representation Learning with a New Benchmark on Action Recognition Conference
IEEE/CVF International Conference on Computer Vision, 2023.
Abstract | Tags: ICCV | Links:
@conference{nokey,
title = {A Large-scale Study of Spatiotemporal Representation Learning with a New Benchmark on Action Recognition},
author = {Andong Dang and Taojiannan Yang and Chen Chen},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/2303.13505.pdf
https://arxiv.org/abs/2303.13505
https://github.com/AndongDeng/BEAR},
year = {2023},
date = {2023-10-02},
urldate = {2023-10-02},
publisher = {IEEE/CVF International Conference on Computer Vision},
abstract = {The goal of building a benchmark (suite of datasets) is to provide a unified protocol for fair evaluation and thus facilitate the evolution of a specific area. Nonetheless, we point out that existing protocols of action recognition could yield partial evaluations due to several limitations. To comprehensively probe the effectiveness of spatiotemporal representation learning, we introduce BEAR, a new BEnchmark on video Action Recognition. BEAR is a collection of 18 video datasets grouped into 5 categories (anomaly, gesture, daily, sports, and instructional), which covers a diverse set of real-world applications. With BEAR, we thoroughly evaluate 6 common spatiotemporal models pre-trained by both supervised and self-supervised learning. We also report transfer performance via standard finetuning, few-shot finetuning, and unsupervised domain adaptation. Our observation suggests that current state-of-the-art cannot solidly guarantee high performance on datasets close to real-world applications, and we hope BEAR can serve as a fair and challenging evaluation benchmark to gain insights on building next-generation spatiotemporal learners. Our dataset, code, and models are released at: https://github.com/AndongDeng/BEAR},
keywords = {ICCV},
pubstate = {published},
tppubtype = {conference}
}
Mendieta, Matias; Chen, Chen
Towards Geospatial Foundation Models via Continual Pretraining Conference
IEEE/CVF International Conference on Computer Vision, 2023.
Abstract | Tags: ICCV | Links:
@conference{nokey,
title = {Towards Geospatial Foundation Models via Continual Pretraining},
author = {Matias Mendieta and Chen Chen},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/2302.04476.pdf
https://arxiv.org/abs/2302.04476
https://github.com/mmendiet/GFM},
year = {2023},
date = {2023-10-02},
urldate = {2023-10-02},
publisher = {IEEE/CVF International Conference on Computer Vision},
abstract = {Geospatial technologies are becoming increasingly essential in our world for a wide range of applications, including agriculture, urban planning, and disaster response. To help improve the applicability and performance of deep learning models on these geospatial tasks, various works have begun investigating foundation models for this domain. Researchers have explored two prominent approaches for introducing such models in geospatial applications, but both have drawbacks in terms of limited performance benefit or prohibitive training cost. Therefore, in this work, we propose a novel paradigm for building highly effective geospatial foundation models with minimal resource cost and carbon impact. We first construct a compact yet diverse dataset from multiple sources to promote feature diversity, which we term GeoPile. Then, we investigate the potential of continual pretraining from large-scale ImageNet-22k models and propose a multi-objective continual pretraining paradigm, which leverages the strong representations of ImageNet while simultaneously providing the freedom to learn valuable in-domain features. Our approach outperforms previous state-of-the-art geospatial pretraining methods in an extensive evaluation on seven downstream datasets covering various tasks such as change detection, classification, multi-label classification, semantic segmentation, and super-resolution.},
keywords = {ICCV},
pubstate = {published},
tppubtype = {conference}
}
Chen, Hao; Qu, Chenyuan; Zhang, Yu; Chen, Chen; Jiao, Jianbo
Multi-view Self-supervised Disentanglement for General Image Denoising Conference
IEEE/CVF International Conference on Computer Vision, 2023.
Abstract | Tags: ICCV | Links:
@conference{nokey,
title = {Multi-view Self-supervised Disentanglement for General Image Denoising},
author = {Hao Chen and Chenyuan Qu and Yu Zhang and Chen Chen and Jianbo Jiao},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/ICCV2023_MeD_Final_Version.pdf
https://www.crcv.ucf.edu/wp-content/uploads/2018/11/ICCV2023_MeD_Supplymentary_Final_Version.pdf
https://chqwer2.github.io/MeD/
https://github.com/chqwer2/Multi-view-Self-supervised-Disentanglement-Denoising},
year = {2023},
date = {2023-10-02},
urldate = {2023-10-02},
publisher = {IEEE/CVF International Conference on Computer Vision},
abstract = {With its significant performance improvements, the deep
learning paradigm has become a standard tool for modern
image denoisers. While promising performance has been
shown on seen noise distributions, existing approaches often suffer from generalisation to unseen noise types or general and real noise. It is understandable as the model is
designed to learn paired mapping (e.g. from a noisy image
to its clean version). In this paper, we instead propose to
learn to disentangle the noisy image, under the intuitive
assumption that different corrupted versions of the same
clean image share a common latent space. A self-supervised
learning framework is proposed to achieve the goal, without looking at the latent clean image. By taking two different corrupted versions of the same image as input, the proposed Multi-view Self-supervised Disentanglement (MeD) approach learns to disentangle the latent clean features from the corruptions and recover the clean image consequently. Extensive experimental analysis on both synthetic and real noise shows the superiority of the proposed method over prior self-supervised approaches, especially on unseen novel noise types. On real noise, the proposed method even outperforms its supervised counterparts by over 3 dB. },
keywords = {ICCV},
pubstate = {published},
tppubtype = {conference}
}
learning paradigm has become a standard tool for modern
image denoisers. While promising performance has been
shown on seen noise distributions, existing approaches often suffer from generalisation to unseen noise types or general and real noise. It is understandable as the model is
designed to learn paired mapping (e.g. from a noisy image
to its clean version). In this paper, we instead propose to
learn to disentangle the noisy image, under the intuitive
assumption that different corrupted versions of the same
clean image share a common latent space. A self-supervised
learning framework is proposed to achieve the goal, without looking at the latent clean image. By taking two different corrupted versions of the same image as input, the proposed Multi-view Self-supervised Disentanglement (MeD) approach learns to disentangle the latent clean features from the corruptions and recover the clean image consequently. Extensive experimental analysis on both synthetic and real noise shows the superiority of the proposed method over prior self-supervised approaches, especially on unseen novel noise types. On real noise, the proposed method even outperforms its supervised counterparts by over 3 dB.
Lijun Li, Linrui Tian; Zhang, Xindi; Wang, Qi; Zhang, Bang; Bo, Liefeng; Liu, Mengyuan; Chen, Chen
RenderIH: A large-scale synthetic dataset for 3D interacting hand pose estimation Conference
IEEE/CVF International Conference on Computer Vision, 2023.
Tags: ICCV
@conference{nokey,
title = {RenderIH: A large-scale synthetic dataset for 3D interacting hand pose estimation},
author = {Lijun Li, Linrui Tian and Xindi Zhang and Qi Wang and Bang Zhang and Liefeng Bo and Mengyuan Liu and Chen Chen},
year = {2023},
date = {2023-10-02},
urldate = {2023-10-02},
publisher = {IEEE/CVF International Conference on Computer Vision},
keywords = {ICCV},
pubstate = {published},
tppubtype = {conference}
}
Vahidian, Saeed; Kadaveru, Sreevatsank; Baek, Woonjoon; Wang, Weijia; Kungurtsev, Vyacheslav; Chen, Chen; Shah, Mubarak; Lin, Bill
When Do Curricula Work in Federated Learning? Conference
IEEE/CVF International Conference on Computer Vision, 2023.
Abstract | Tags: ICCV | Links:
@conference{Vahidian2023b,
title = {When Do Curricula Work in Federated Learning? },
author = {Saeed Vahidian and Sreevatsank Kadaveru and Woonjoon Baek and Weijia Wang and Vyacheslav Kungurtsev and Chen Chen and Mubarak Shah and Bill Lin},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/2212.12712.pdf
https://arxiv.org/abs/2212.12712},
doi = {https://doi.org/10.48550/arXiv.2212.12712},
year = {2023},
date = {2023-10-02},
urldate = {2023-10-02},
publisher = {IEEE/CVF International Conference on Computer Vision},
abstract = {An oft-cited open problem of federated learning is the existence of data heterogeneity at the clients. One pathway to understanding the drastic accuracy drop in federated learning is by scrutinizing the behavior of the clients' deep models on data with different levels of "difficulty", which has been left unaddressed. In this paper, we investigate a different and rarely studied dimension of FL: ordered learning. Specifically, we aim to investigate how ordered learning principles can contribute to alleviating the heterogeneity effects in FL. We present theoretical analysis and conduct extensive empirical studies on the efficacy of orderings spanning three kinds of learning: curriculum, anti-curriculum, and random curriculum. We find that curriculum learning largely alleviates non-IIDness. Interestingly, the more disparate the data distributions across clients the more they benefit from ordered learning. We provide analysis explaining this phenomenon, specifically indicating how curriculum training appears to make the objective landscape progressively less convex, suggesting fast converging iterations at the beginning of the training procedure. We derive quantitative results of convergence for both convex and nonconvex objectives by modeling the curriculum training on federated devices as local SGD with locally biased stochastic gradients. Also, inspired by ordered learning, we propose a novel client selection technique that benefits from the real-world disparity in the clients. Our proposed approach to client selection has a synergic effect when applied together with ordered learning in FL.},
keywords = {ICCV},
pubstate = {published},
tppubtype = {conference}
}
Samarasinghe, Sarinda; Nayeem, Mamshad; Kardan, Rizve Navid; Shah, Mubarak
CDFSL-V: Cross-Domain Few-Shot Learning for Videos Conference
IEEE/CVF International Conference on Computer Vision, 2023.
Abstract | Tags: ICCV | Links:
@conference{Samarasinghe2023,
title = {CDFSL-V: Cross-Domain Few-Shot Learning for Videos},
author = {Sarinda Samarasinghe and Mamshad Nayeem and Rizve Navid Kardan and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/CDFSL_Video_Combined_Final.pdf
https://sarinda251.github.io/CDFSL-V-site/
https://www.youtube.com/watch?v=RdlEzfW013o},
year = {2023},
date = {2023-10-02},
urldate = {2023-10-02},
publisher = {IEEE/CVF International Conference on Computer Vision},
abstract = {Few-shot video action recognition is an effective approach to recognizing new categories with only a few labeled examples, thereby reducing the challenges associated with collecting and annotating large-scale video datasets. Existing methods in video action recognition rely on large labeled datasets from the same domain. However, this setup is not realistic as novel categories may come from different data domains that may have different spatial and temporal characteristics. This dissimilarity between the source and target domains can pose a significant challenge, rendering traditional few-shot action recognition techniques ineffective. To address this issue, in this work, we propose a novel cross-domain few-shot video action recognition method that leverages self-supervised learning and curriculum learning to balance the information from the source and target domains. To be particular, our method employs a masked autoencoder-based self-supervised training objective to learn from both source and target data in a self-supervised manner. Then a progressive curriculum balances learning the discriminative information from the source dataset with the generic information learned from the target domain. Initially, our curriculum utilizes supervised learning to learn class discriminative features from the source data. As the training progresses, we transition to learning target-domain-specific features. We propose a progressive curriculum to encourage the emergence of rich features in the target domain based on class discriminative supervised features in the source domain. %a schedule that helps with this transition. We evaluate our method on several challenging benchmark datasets and demonstrate that our approach outperforms existing cross-domain few-shot learning techniques.},
keywords = {ICCV},
pubstate = {published},
tppubtype = {conference}
}
Sirnam, Swetha; Rizve, Mamshad Nayeem; Kuhne, Hilde; Shah, Mubarak
Preserving Modality Structure Improves Multi-Modal Learning Conference
IEEE/CVF International Conference on Computer Vision, 2023.
Abstract | Tags: ICCV | Links:
@conference{nokey,
title = {Preserving Modality Structure Improves Multi-Modal Learning },
author = {Swetha Sirnam and Mamshad Nayeem Rizve and Hilde Kuhne and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/2308.13077.pdf
https://arxiv.org/abs/2308.13077
https://github.com/Swetha5/Multi_Sinkhorn_Knopp
https://swetha5.github.io/MultiSK/
https://youtu.be/1CrGkUATy50
},
doi = {https://doi.org/10.48550/arXiv.2308.13077},
year = {2023},
date = {2023-10-02},
urldate = {2023-10-02},
publisher = {IEEE/CVF International Conference on Computer Vision},
abstract = {Self-supervised learning on large-scale multi-modal datasets allows learning semantically meaningful embeddings in a joint multi-modal representation space without relying on human annotations. These joint embeddings enable zero-shot cross-modal tasks like retrieval and classification. However, these methods often struggle to generalize well on out-of-domain data as they ignore the semantic structure present in modality-specific embeddings. In this context, we propose a novel Semantic-Structure-Preserving Consistency approach to improve generalizability by preserving the modality-specific relationships in the joint embedding space. To capture modality-specific semantic relationships between samples, we propose to learn multiple anchors and represent the multifaceted relationship between samples with respect to their relationship with these anchors. To assign multiple anchors to each sample, we propose a novel Multi-Assignment Sinkhorn-Knopp algorithm. Our experimentation demonstrates that our proposed approach learns semantically meaningful anchors in a self-supervised manner. Furthermore, our evaluation on MSR-VTT and YouCook2 datasets demonstrates that our proposed multi-anchor assignment based solution achieves state-of-the-art performance and generalizes to both inand out-of-domain datasets. Code: https://github.com/Swetha5/Multi_Sinkhorn_Knopp},
keywords = {ICCV},
pubstate = {published},
tppubtype = {conference}
}
Liu, Daochang; Li, Qiyue; Dinh, Anh-Dung; Jiang, Tingting; Shah, Mubarak; Xu, Chang
Diffusion Action Segmentation Conference
IEEE/CVF International Conference on Computer Vision, 2023.
Abstract | Tags: ICCV | Links:
@conference{Liu2023b,
title = {Diffusion Action Segmentation},
author = {Daochang Liu and Qiyue Li and Anh-Dung Dinh and Tingting Jiang and Mubarak Shah and Chang Xu},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/2303.17959.pdf
https://arxiv.org/abs/2303.17959
https://finspire13.github.io/DiffAct-Project-Page/
https://github.com/Finspire13/DiffAct
https://youtu.be/o_Jp8shth7U
https://www.crcv.ucf.edu/wp-content/uploads/2018/11/Slides.pptx},
doi = { https://doi.org/10.48550/arXiv.2303.17959},
year = {2023},
date = {2023-10-02},
urldate = {2023-10-02},
publisher = {IEEE/CVF International Conference on Computer Vision},
abstract = {Temporal action segmentation is crucial for understanding long-form videos. Previous works on this task commonly adopt an iterative refinement paradigm by using multi-stage models. We propose a novel framework via denoising diffusion models, which nonetheless shares the same inherent spirit of such iterative refinement. In this framework, action predictions are iteratively generated from random noise with input video features as conditions. To enhance the modeling of three striking characteristics of human actions, including the position prior, the boundary ambiguity, and the relational dependency, we devise a unified masking strategy for the conditioning inputs in our framework. Extensive experiments on three benchmark datasets, i.e., GTEA, 50Salads, and Breakfast, are performed and the proposed method achieves superior or comparable results to state-of-the-art methods, showing the effectiveness of a generative approach for action segmentation.},
keywords = {ICCV},
pubstate = {published},
tppubtype = {conference}
}
Fioresi, Joseph; Dave, Ishan; Shah, Mubarak
TeD-SPAD: Temporal Distinctiveness for Self-supervised Privacy-preservation for video Anomaly Detection Conference
IEEE/CVF International Conference on Computer Vision, 2023.
Tags: ICCV | Links:
@conference{Fioresi2023,
title = {TeD-SPAD: Temporal Distinctiveness for Self-supervised Privacy-preservation for video Anomaly Detection},
author = {Joseph Fioresi and Ishan Dave and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/2308.11072.pdf
https://arxiv.org/abs/2308.11072
https://github.com/UCF-CRCV/TeD-SPAD
https://joefioresi718.github.io/TeD-SPAD_webpage/
https://www.crcv.ucf.edu/wp-content/uploads/2018/11/TeDSPAD_ICCV_poster.pdf
https://youtu.be/3a9qeJUD1GU},
year = {2023},
date = {2023-10-02},
urldate = {2023-10-02},
publisher = {IEEE/CVF International Conference on Computer Vision},
keywords = {ICCV},
pubstate = {published},
tppubtype = {conference}
}
Wasim, Syed Talal; Khattak, Muhammad Uzair; Naseer, Muzammal; Khan, Salman; Shah, Mubarak; Khan, Fahad Shahbaz
Video-FocalNets: Spatio-Temporal Focal Modulation for Video Action Recognition Conference
IEEE/CVF International Conference on Computer Vision, 2023.
Abstract | Tags: ICCV | Links:
@conference{nokey,
title = {Video-FocalNets: Spatio-Temporal Focal Modulation for Video Action Recognition },
author = {Syed Talal Wasim and Muhammad Uzair Khattak and Muzammal Naseer and Salman Khan and Mubarak Shah and Fahad Shahbaz Khan },
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/2307.06947.pdf
https://arxiv.org/abs/2307.06947
https://talalwasim.github.io/Video-FocalNets/
https://github.com/TalalWasim/Video-FocalNets
https://talalwasim.github.io/Video-FocalNets/#BibTeX},
doi = { https://doi.org/10.48550/arXiv.2307.06947},
year = {2023},
date = {2023-10-02},
urldate = {2023-10-02},
publisher = {IEEE/CVF International Conference on Computer Vision},
abstract = {Recent video recognition models utilize Transformer models for long-range spatio-temporal context modeling. Video transformer designs are based on self-attention that can model global context at a high computational cost. In comparison, convolutional designs for videos offer an efficient alternative but lack long-range dependency modeling. Towards achieving the best of both designs, this work proposes Video-FocalNet, an effective and efficient architecture for video recognition that models both local and global contexts. Video-FocalNet is based on a spatio-temporal focal modulation architecture that reverses the interaction and aggregation steps of self-attention for better efficiency. Further, the aggregation step and the interaction step are both implemented using efficient convolution and element-wise multiplication operations that are computationally less expensive than their self-attention counterparts on video representations. We extensively explore the design space of focal modulation-based spatio-temporal context modeling and demonstrate our parallel spatial and temporal encoding design to be the optimal choice. Video-FocalNets perform favorably well against the state-of-the-art transformer-based models for video recognition on three large-scale datasets (Kinetics-400, Kinetics-600, and SS-v2) at a lower computational cost. Our code/models are publicly released.},
keywords = {ICCV},
pubstate = {published},
tppubtype = {conference}
}
Jain, Nishant; Behl, Harkirat; Rawat, Yogesh Singh; Vineet, Vibhav
Efficiently Robustify Pre-Trained Models Conference
IEEE/CVF International Conference on Computer Vision, 2023.
Abstract | Tags: ICCV | Links:
@conference{nokey,
title = {Efficiently Robustify Pre-Trained Models},
author = {Nishant Jain and Harkirat Behl and Yogesh Singh Rawat and Vibhav Vineet},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/ICCV23_Robust_Learning.pdf},
year = {2023},
date = {2023-10-02},
publisher = {IEEE/CVF International Conference on Computer Vision},
abstract = {A recent trend in deep learning algorithms has been towards training large scale models, having high parameter count and trained on big dataset. However, robustness of such large scale models towards real-world settings is still a less-explored topic. In this work, we first benchmark the performance of these models under different perturbations and datasets thereby representing real-world shifts, and highlight their degrading performance under these shifts. We then discuss on how complete model fine-tuning based existing robustification schemes might not be a scalable option given very large scale networks and can also lead them to forget some of the desired characteristics. Finally, we propose a simple and cost-effective method to solve this problem, inspired by knowledge transfer literature. It involves robustifying smaller models, at a lower computation cost, and then use them as teachers to tune a fraction of these large scale networks, reducing the overall computational overhead. We evaluate our proposed method under various vision perturbations including ImageNet-C,R,S,A datasets and also for transfer learning, zero-shot evaluation setups on different datasets. Benchmark results show that our method is able to induce robustness to these large scale models efficiently, requiring significantly lower time and also preserves the transfer learning, zero-shot properties of the original model which none of the existing methods are able to achieve. },
keywords = {ICCV},
pubstate = {published},
tppubtype = {conference}
}
Zhou, Yifei; Li, Zilu; Shrivasta, Abhinav; Zhao, Hengshuang; Torralba, Antonio; Tian, Taipeng; Lim, Ser-Nam
BT^2 : Backward-compatible Training with Basis Transformation Conference
IEEE/CVF International Conference on Computer Vision, 2023.
Abstract | Tags: ICCV | Links:
@conference{Zhou2023,
title = {BT^2 : Backward-compatible Training with Basis Transformation},
author = {Yifei Zhou and Zilu Li and Abhinav Shrivasta and Hengshuang Zhao and Antonio Torralba and Taipeng Tian and Ser-Nam Lim},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/2211.03989v3.pdf
https://arxiv.org/abs/2211.03989v3},
doi = {https://doi.org/10.48550/arXiv.2211.03989},
year = {2023},
date = {2023-10-02},
urldate = {2023-10-02},
publisher = {IEEE/CVF International Conference on Computer Vision},
abstract = {Modern retrieval system often requires recomputing the representation of every piece of data in the gallery when updating to a better representation model. This process is known as backfilling and can be especially costly in the real world where the gallery often contains billions of samples. Recently, researchers have proposed the idea of Backward Compatible Training (BCT) where the new representation model can be trained with an auxiliary loss to make it backward compatible with the old representation. In this way, the new representation can be directly compared with the old representation, in principle avoiding the need for any backfilling. However, followup work shows that there is an inherent tradeoff where a backward compatible representation model cannot simultaneously maintain the performance of the new model itself. This paper reports our ``not-so-surprising'' finding that adding extra dimensions to the representation can help here. However, we also found that naively increasing the dimension of the representation did not work. To deal with this, we propose Backward-compatible Training with a novel Basis Transformation (BT2). A basis transformation (BT) is basically a learnable set of parameters that applies an orthonormal transformation. Such a transformation possesses an important property whereby the original information contained in its input is retained in its output. We show in this paper how a BT can be utilized to add only the necessary amount of additional dimensions. We empirically verify the advantage of BT2 over other state-of-the-art methods in a wide range of settings. We then further extend BT2 to other challenging yet more practical settings, including significant change in model architecture (CNN to Transformers), modality change, and even a series of updates in the model architecture mimicking the evolution of deep learning models.},
keywords = {ICCV},
pubstate = {published},
tppubtype = {conference}
}
Chen, Xi; Li, Shuang; Lim, Ser-Nam; Torralba, Antonio; Zhao, Hengshuang
Open-vocabulary Panoptic Segmentation with Embedding Modulation Conference
IEEE/CVF International Conference on Computer Vision, 2023.
Abstract | Tags: ICCV | Links:
@conference{Chen2023b,
title = {Open-vocabulary Panoptic Segmentation with Embedding Modulation},
author = {Xi Chen and Shuang Li and Ser-Nam Lim and Antonio Torralba and Hengshuang Zhao},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/2303.11324.pdf
https://arxiv.org/abs/2303.11324
https://opsnet-page.github.io/},
doi = {https://doi.org/10.48550/arXiv.2303.11324},
year = {2023},
date = {2023-10-02},
urldate = {2023-10-02},
publisher = {IEEE/CVF International Conference on Computer Vision},
abstract = {Open-vocabulary image segmentation is attracting increasing attention due to its critical applications in the real world. Traditional closed-vocabulary segmentation methods are not able to characterize novel objects, whereas several recent open-vocabulary attempts obtain unsatisfactory results, i.e., notable performance reduction on the closed vocabulary and massive demand for extra data. To this end, we propose OPSNet, an omnipotent and data-efficient framework for Open-vocabulary Panoptic Segmentation. Specifically, the exquisitely designed Embedding Modulation module, together with several meticulous components, enables adequate embedding enhancement and information exchange between the segmentation model and the visual-linguistic well-aligned CLIP encoder, resulting in superior segmentation performance under both open- and closed-vocabulary settings with much fewer need of additional data. Extensive experimental evaluations are conducted across multiple datasets (e.g., COCO, ADE20K, Cityscapes, and PascalContext) under various circumstances, where the proposed OPSNet achieves state-of-the-art results, which demonstrates the effectiveness and generality of the proposed approach. The code and trained models will be made publicly available.},
keywords = {ICCV},
pubstate = {published},
tppubtype = {conference}
}
Hammoud, Hasan Abed Al Kader; Prabhu, Ameya; Lim, Ser-Nam; Torr, Philip; Bibi, Adel; Ghanem, Bernard
Towards a True Evaluation of Rapid Adaptation in Online Continual Learning Conference
IEEE/CVF International Conference on Computer Vision, 2023.
Abstract | Tags: ICCV | Links:
@conference{Hammoud2023,
title = {Towards a True Evaluation of Rapid Adaptation in Online Continual Learning},
author = {Hasan Abed Al Kader Hammoud and Ameya Prabhu and Ser-Nam Lim and Philip Torr and Adel Bibi and Bernard Ghanem},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/2305.09275.pdf
https://arxiv.org/abs/2305.09275
https://github.com/drimpossible/EvalOCL},
doi = {https://doi.org/10.48550/arXiv.2305.09275},
year = {2023},
date = {2023-10-02},
urldate = {2023-10-02},
publisher = {IEEE/CVF International Conference on Computer Vision},
abstract = {We revisit the common practice of evaluating adaptation of Online Continual Learning (OCL) algorithms through the metric of online accuracy, which measures the accuracy of the model on the immediate next few samples. However, we show that this metric is unreliable, as even vacuous blind classifiers, which do not use input images for prediction, can achieve unrealistically high online accuracy by exploiting spurious label correlations in the data stream. Our study reveals that existing OCL algorithms can also achieve high online accuracy, but perform poorly in retaining useful information, suggesting that they unintentionally learn spurious label correlations. To address this issue, we propose a novel metric for measuring adaptation based on the accuracy on the near-future samples, where spurious correlations are removed. We benchmark existing OCL approaches using our proposed metric on large-scale datasets under various computational budgets and find that better generalization can be achieved by retaining and reusing past seen information. We believe that our proposed metric can aid in the development of truly adaptive OCL methods. We provide code to reproduce our results at https://github.com/drimpossible/EvalOCL.},
keywords = {ICCV},
pubstate = {published},
tppubtype = {conference}
}
Thawakar, Omkar; Anwer, Rao Muhammad; Laaksonen, Jorma; Reiner, Orly; Shah, Mubarak; Khan, Fahad Shahbaz
3D Mitochondria Instance Segmentation with Spatio-Temporal Transformers Conference
Lecture Notes in Computer Science, vol. 14227, Medical Image Computing and Computer Assisted Intervention – MICCAI 2023, 2023, ISBN: 978-3-031-43993-3.
Abstract | Tags: | Links:
@conference{nokey,
title = {3D Mitochondria Instance Segmentation with Spatio-Temporal Transformers},
author = {Omkar Thawakar and Rao Muhammad Anwer and Jorma Laaksonen and Orly Reiner and Mubarak Shah and Fahad Shahbaz Khan},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/2303.12073.pdf
https://github.com/OmkarThawakar/STT-UNET
https://arxiv.org/pdf/2303.12073.pdf
https://link.springer.com/chapter/10.1007/978-3-031-43993-3_59
},
doi = {https://doi.org/10.1007/978-3-031-43993-3_59},
isbn = {978-3-031-43993-3},
year = {2023},
date = {2023-10-01},
booktitle = {Lecture Notes in Computer Science},
journal = {arXiv:2303.12073},
volume = {14227},
publisher = {Medical Image Computing and Computer Assisted Intervention – MICCAI 2023},
abstract = {Accurate 3D mitochondria instance segmentation in electron microscopy (EM) is a challenging problem and serves as a prerequisite to empirically analyze their distributions and morphology. Most existing approaches employ 3D convolutions to obtain representative features. However, these convolution-based approaches struggle to effectively capture long-range dependencies in the volume mitochondria data, due to their limited local receptive field. To address this, we propose a hybrid encoder-decoder framework based on a split spatio-temporal attention module that efficiently computes spatial and temporal self-attentions in parallel, which are later fused through a deformable convolution. Further, we introduce a semantic foreground-background adversarial loss during training that aids in delineating the region of mitochondria instances from the background clutter. Our extensive experiments on three benchmarks, Lucchi, MitoEM-R and MitoEM-H, reveal the benefits of the proposed contributions achieving state-of-the-art results on all three datasets. Our code and models are available at https://github.com/OmkarThawakar/STT-UNET.},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Kini, Jyoti; Fleischer, Sarah; Dave, Ishan; Shah, Mubarak
Ensemble Modeling for Multimodal Visual Action Recognition Workshop
22nd International Conference on Image Analysis and Processing Workshops - Multimodal Action Recognition on the MECCANO Dataset, 2023.
Tags: ICIAPW, REU, Video Action Recognition | Links:
@workshop{Kini2023b,
title = {Ensemble Modeling for Multimodal Visual Action Recognition},
author = {Jyoti Kini and Sarah Fleischer and Ishan Dave and Mubarak Shah},
url = {https://arxiv.org/pdf/2308.05430.pdf
https://www.crcv.ucf.edu/research/projects/ensemble-modeling-for-multimodal-visual-action-recognition/},
year = {2023},
date = {2023-09-11},
urldate = {2023-09-11},
booktitle = {22nd International Conference on Image Analysis and Processing Workshops - Multimodal Action Recognition on the MECCANO Dataset},
keywords = {ICIAPW, REU, Video Action Recognition},
pubstate = {published},
tppubtype = {workshop}
}
Zhu, Sijie; Yang, Linjie; Chen, Chen; Shah, Mubarak; Shen, Xiaohui; Wang, Heng
R2Former: Unified retrieval and ranking Transformer for Place Recognition Conference
IEEE Computer Vision and Pattern Recognition, 2023.
Abstract | Tags: CVPR | Links:
@conference{Zhu2023,
title = {R2Former: Unified retrieval and ranking Transformer for Place Recognition},
author = {Sijie Zhu and Linjie Yang and Chen Chen and Mubarak Shah and Xiaohui Shen and Heng Wang},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/CVPR_2023_PlaceRecognitionFinal.pdf
https://arxiv.org/pdf/2304.03410.pdf
https://github.com/Jeff-Zilence/R2Former},
year = {2023},
date = {2023-06-18},
urldate = {2023-06-18},
publisher = {IEEE Computer Vision and Pattern Recognition},
abstract = {Visual Place Recognition (VPR) estimates the location of query images by matching them with images in a reference database. Conventional methods generally adopt aggregated CNN features for global retrieval and RANSAC-based geometric verification for reranking. However, RANSAC only considers geometric information but ignores other possible information that could be useful for reranking, e.g. local feature correlation, and attention values. In this paper, we propose a unified place recognition framework that handles both retrieval and reranking with a novel transformer model, named R2Former. The proposed reranking module takes feature correlation, attention value, and xy coordinates into account, and learns to determine whether the image pair is from the same location. The whole pipeline is end-to-end trainable and the reranking module alone can
also be adopted on other CNN or transformer backbones as a generic component. Remarkably, R2Former significantly
outperforms state-of-the-art methods on major VPR datasets with much less inference time and memory consumption.
It also achieves the state-of-the-art on the holdout MSLS challenge set and could serve as a simple yet strong solution for real-world large-scale applications. Experiments also show vision transformer tokens are comparable and sometimes better than CNN local features on local matching. The code will be publicly available. },
keywords = {CVPR},
pubstate = {published},
tppubtype = {conference}
}
also be adopted on other CNN or transformer backbones as a generic component. Remarkably, R2Former significantly
outperforms state-of-the-art methods on major VPR datasets with much less inference time and memory consumption.
It also achieves the state-of-the-art on the holdout MSLS challenge set and could serve as a simple yet strong solution for real-world large-scale applications. Experiments also show vision transformer tokens are comparable and sometimes better than CNN local features on local matching. The code will be publicly available.
Gupta, Rohit; Roy, Anirban; Kim, Sujeong; Christensen, Claire; Grindal, Todd; Gerard, Sarah Nixon; Cincebeaux, Madeline; Divakaran, Ajay; Shah, Mubarak
Class Prototypes based Contrastive Learning for Classifying Multi-Label and Fine-Grained Educational Videos Conference
IEEE Computer Vision and Pattern Recognition, 2023.
Abstract | Tags: CVPR | Links:
@conference{Gupta2023b,
title = {Class Prototypes based Contrastive Learning for Classifying Multi-Label and Fine-Grained Educational Videos},
author = {Rohit Gupta and Anirban Roy and Sujeong Kim and Claire Christensen and Todd Grindal and Sarah Nixon Gerard and Madeline Cincebeaux and Ajay Divakaran and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/Rohit_SRI_CVPR2023_Multi_Modal_Multi_Label_Contrastive_Learning_Camera_Ready-4.pdf
https://www.rohitg.xyz/MMContrast/
https://nusci.csl.sri.com/project/APPROVE},
year = {2023},
date = {2023-06-18},
urldate = {2023-06-18},
publisher = {IEEE Computer Vision and Pattern Recognition},
abstract = {The recent growth in the consumption of online media by children during early childhood necessitates data-driven tools enabling educators to filter out appropriate educational content for young learners. This paper presents an approach for detecting educational content in online videos. We focus on two widely used educational content classes: literacy and math. For each class, we choose prominent codes (sub-classes) based on the Common Core Standards. For example, literacy codes include ‘letter names’, ‘letter sounds’, and math codes include ‘counting’, ‘sorting’. We pose this as a fine-grained multilabel classification problem as videos can contain multiple types of educational content and the content classes can get visually similar (e.g., ‘letter names’ vs ‘letter sounds’). We propose a novel class prototypes based supervised contrastive learning approach that can handle fine-grained samples associated with multiple labels. We learn a class prototype for each class and a loss function is employed to minimize the distances between a class prototype and the samples from the class. Similarly,
distances between a class prototype and the samples from other classes are maximized. As the alignment between visual
and audio cues are crucial for effective comprehension, we consider a multimodal transformer network to capture the interaction between visual and audio cues in videos while learning the embedding for videos. For evaluation, we present a dataset, APPROVE, employing educational videos from YouTube labeled with fine-grained education classes by education researchers. APPROVE consists of 193 hours of expert-annotated videos with 19 classes. The proposed approach outperforms strong baselines on APPROVE and other benchmarks such as Youtube-8M, and COIN. The dataset is available at https://nusci.csl.sri.com/project/APPROVE.},
keywords = {CVPR},
pubstate = {published},
tppubtype = {conference}
}
distances between a class prototype and the samples from other classes are maximized. As the alignment between visual
and audio cues are crucial for effective comprehension, we consider a multimodal transformer network to capture the interaction between visual and audio cues in videos while learning the embedding for videos. For evaluation, we present a dataset, APPROVE, employing educational videos from YouTube labeled with fine-grained education classes by education researchers. APPROVE consists of 193 hours of expert-annotated videos with 19 classes. The proposed approach outperforms strong baselines on APPROVE and other benchmarks such as Youtube-8M, and COIN. The dataset is available at https://nusci.csl.sri.com/project/APPROVE.
Dave, Ishan Rajendrakumar; Rizve, Mamshad Nayeem; Chen, Chen; Shah, Mubarak
TimeBalance: Temporally-Invariant and Temporally-Distinctive Video Representations for Semi-Supervised Action Recognition Conference
IEEE Computer Vision and Pattern Recognition, 2023.
Abstract | Tags: CVPR | Links:
@conference{Dave2023,
title = {TimeBalance: Temporally-Invariant and Temporally-Distinctive Video Representations for Semi-Supervised Action Recognition},
author = {Ishan Rajendrakumar Dave and Mamshad Nayeem Rizve and Chen Chen and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/TimeBalance_CVPR23_arxiv.pdf
https://daveishan.github.io/timebalance_webpage/
https://github.com/DAVEISHAN/TimeBalance},
year = {2023},
date = {2023-06-18},
urldate = {2023-06-18},
publisher = {IEEE Computer Vision and Pattern Recognition},
abstract = {Semi-Supervised Learning can be more beneficial for the video domain compared to images because of its higher annotation
cost and dimensionality. Besides, any video understanding task requires reasoning over both spatial and temporal dimensions. In order to learn both the static and motion related features for the semi-supervised action recognition task, existing methods rely on hard input inductive biases like using two-modalities (RGB and Optical-flow) or two-stream of different playback rates.
Instead of utilizing unlabeled videos through diverse input streams, we rely on self-supervised video representations,
particularly, we utilize temporally-invariant and temporally-distinctive representations. We observe that these representations complement each other depending on the nature of the action. Based on this observation, we propose a student-teacher semi-supervised learning framework, TimeBalance, where we distill the knowledge from a temporally-invariant and a temporally-distinctive teacher. Depending on the nature of the unlabeled video, we dynamically combine the knowledge of these two teachers based on a novel temporal similarity-based reweighting scheme. Our method achieves state-of-the-art performance
on three action recognition benchmarks: UCF101, HMDB51, and Kinetics400. Code: https://github.com/DAVEISHAN/TimeBalance.},
keywords = {CVPR},
pubstate = {published},
tppubtype = {conference}
}
cost and dimensionality. Besides, any video understanding task requires reasoning over both spatial and temporal dimensions. In order to learn both the static and motion related features for the semi-supervised action recognition task, existing methods rely on hard input inductive biases like using two-modalities (RGB and Optical-flow) or two-stream of different playback rates.
Instead of utilizing unlabeled videos through diverse input streams, we rely on self-supervised video representations,
particularly, we utilize temporally-invariant and temporally-distinctive representations. We observe that these representations complement each other depending on the nature of the action. Based on this observation, we propose a student-teacher semi-supervised learning framework, TimeBalance, where we distill the knowledge from a temporally-invariant and a temporally-distinctive teacher. Depending on the nature of the unlabeled video, we dynamically combine the knowledge of these two teachers based on a novel temporal similarity-based reweighting scheme. Our method achieves state-of-the-art performance
on three action recognition benchmarks: UCF101, HMDB51, and Kinetics400. Code: https://github.com/DAVEISHAN/TimeBalance.
Rizve, Mamshad Nayeem; Mittal, Gaurav; Yu, Ye; Hall, Matthew; Sajeev, Sandra; Shah, Mubarak; Chen, Mei
PivoTAL: Prior-Driven Supervision for Weakly-Supervised Temporal Action Localization Conference
IEEE Computer Vision and Pattern Recognition, 2023.
Tags: CVPR | Links:
@conference{Rizve2023,
title = {PivoTAL: Prior-Driven Supervision for Weakly-Supervised Temporal Action Localization},
author = {Mamshad Nayeem Rizve and Gaurav Mittal and Ye Yu and Matthew Hall and Sandra Sajeev and Mubarak Shah and Mei Chen},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/PivoTAL_CVPR_2023.pdf
https://www.crcv.ucf.edu/wp-content/uploads/2018/11/PivoTAL_CVPR_2023_Supplemental_Material.pdf
https://www.crcv.ucf.edu/wp-content/uploads/2018/11/PivoTAL_CVPR2023_Poster.pdf
https://www.youtube.com/watch?v=6kAoQjXfzio},
year = {2023},
date = {2023-06-18},
urldate = {2023-06-18},
publisher = {IEEE Computer Vision and Pattern Recognition},
keywords = {CVPR},
pubstate = {published},
tppubtype = {conference}
}
Urooj, Aisha; Kuehne, Hilde; Wu, Bo; Chheu, Kim; Bousselham, Walid; Gan, Chuang; Lobo, Niels; Shah, Mubarak
Learning Situation Hyper-Graphs for Video Question Answering Conference
IEEE Computer Vision and Pattern Recognition, 2023.
Abstract | Tags: CVPR | Links:
@conference{Urooj2023,
title = {Learning Situation Hyper-Graphs for Video Question Answering},
author = {Aisha Urooj and Hilde Kuehne and Bo Wu and Kim Chheu and Walid Bousselham and Chuang Gan and Niels Lobo and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/2023072364-4.pdf
https://www.crcv.ucf.edu/wp-content/uploads/2018/11/SHG_VQA_CVPR2023_cam_ready_supp.pdf
},
year = {2023},
date = {2023-06-18},
urldate = {2023-06-18},
publisher = {IEEE Computer Vision and Pattern Recognition},
abstract = {Answering questions about complex situations in videos requires not only capturing the presence of actors, objects, and their relations but also the evolution of these relationships over time. A situation hyper-graph is a representation that describes situations as scene sub-graphs for video frames and hyper-edges for connected sub-graphs and has been proposed to capture all such information in a compact structured form. In this work, we propose an architecture for Video Question Answering (VQA) that enables answering questions related to video content by predicting situation hyper-graphs, coined Situation Hyper-Graph based Video Question Answering (SHG-VQA). To this end, we train a situation hyper-graph decoder to implicitly identify graph representations with actions and object/human-object relationships from the input video clip. and to use cross-attention
between the predicted situation hyper-graphs and the question embedding to predict the correct answer. The proposed
method is trained in an end-to-end manner and optimized by a VQA loss with the cross-entropy function and a Hungarian
matching loss for the situation graph prediction. The effectiveness of the proposed architecture is extensively evaluated
on two challenging benchmarks: AGQA and STAR. Our results show that learning the underlying situation hypergraphs
helps the system to significantly improve its performance for novel challenges of video question-answering tasks. },
keywords = {CVPR},
pubstate = {published},
tppubtype = {conference}
}
between the predicted situation hyper-graphs and the question embedding to predict the correct answer. The proposed
method is trained in an end-to-end manner and optimized by a VQA loss with the cross-entropy function and a Hungarian
matching loss for the situation graph prediction. The effectiveness of the proposed architecture is extensively evaluated
on two challenging benchmarks: AGQA and STAR. Our results show that learning the underlying situation hypergraphs
helps the system to significantly improve its performance for novel challenges of video question-answering tasks.
Bhunia, Ankan Kumar; Khan, Salman; Cholakkal, Hisham; Anwer, Rao Muhammad; Laaksonen, Jorma Tapio; Shah, Mubarak; Khan, Fahad
Person Image Synthesis via Denoising Diffusion Model Conference
IEEE Computer Vision and Pattern Recognition, 2023.
Abstract | Tags: CVPR | Links:
@conference{Bhunia2023,
title = {Person Image Synthesis via Denoising Diffusion Model},
author = {Ankan Kumar Bhunia and Salman Khan and Hisham Cholakkal and Rao Muhammad Anwer and Jorma Tapio Laaksonen and Mubarak Shah and Fahad Khan},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/person_image_synthesis_via_den-Camera-ready-PDF.pdf
https://lnkd.in/d-8v3r8B
https://lnkd.in/dGPTjvge
https://lnkd.in/dxcGQsUX
https://github.com/ankanbhunia/PIDM},
year = {2023},
date = {2023-06-18},
urldate = {2023-06-18},
publisher = {IEEE Computer Vision and Pattern Recognition},
abstract = {The pose-guided person image generation task requires synthesizing photorealistic images of humans in arbitrary poses. The existing approaches use generative adversarial networks that do not necessarily maintain realistic textures or need dense correspondences that struggle to handle complex deformations and severe occlusions. In this work, we show how denoising diffusion models can be applied for high-fidelity person image synthesis with
strong sample diversity and enhanced mode coverage of the learnt data distribution. Our proposed Person Image Diffusion Model (PIDM) disintegrates the complex transfer problem into a series of simpler forward-backward denoising steps. This helps in learning plausible source-to-target transformation trajectories that result in faithful textures and undistorted appearance details. We introduce a ‘texture diffusion module’ based on cross-attention to accurately model the correspondences between appearance and pose information available in source and target images. Further, we propose disentangled classifier-free guidance’ to ensure close resemblance between the conditional inputs and the synthesized output in terms of both pose and appearance information. Our extensive results on two large-scale benchmarks and a user study demonstrate the photorealism of our proposed approach under challenging scenarios. We also show how our generated images can help in downstream tasks. Code is available at https://github.com/ankanbhunia/PIDM.},
keywords = {CVPR},
pubstate = {published},
tppubtype = {conference}
}
strong sample diversity and enhanced mode coverage of the learnt data distribution. Our proposed Person Image Diffusion Model (PIDM) disintegrates the complex transfer problem into a series of simpler forward-backward denoising steps. This helps in learning plausible source-to-target transformation trajectories that result in faithful textures and undistorted appearance details. We introduce a ‘texture diffusion module’ based on cross-attention to accurately model the correspondences between appearance and pose information available in source and target images. Further, we propose disentangled classifier-free guidance’ to ensure close resemblance between the conditional inputs and the synthesized output in terms of both pose and appearance information. Our extensive results on two large-scale benchmarks and a user study demonstrate the photorealism of our proposed approach under challenging scenarios. We also show how our generated images can help in downstream tasks. Code is available at https://github.com/ankanbhunia/PIDM.
Wasim, Syed Talal; Naseer, Muzammal; Khan, Salman; Khan, Fahad; Shah, Mubarak
Vita-CLIP: Video and text adaptive CLIP via Multimodal Prompting Conference
IEEE Computer Vision and Pattern Recognition, 2023.
Abstract | Tags: CVPR | Links:
@conference{Wasim2023,
title = {Vita-CLIP: Video and text adaptive CLIP via Multimodal Prompting},
author = {Syed Talal Wasim and Muzammal Naseer and Salman Khan and Fahad Khan and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/vita_clip_video_and_text_adapt-Camera-ready-PDF.pdf
},
year = {2023},
date = {2023-06-18},
urldate = {2023-06-18},
publisher = {IEEE Computer Vision and Pattern Recognition},
abstract = {Adopting contrastive image-text pretrained models like CLIP towards video classification has gained attention due to its cost-effectiveness and competitive performance. However, recent works in this area face a trade-off. Finetuning the pretrained model to achieve strong supervised performance results in low zero-shot generalization. Similarly, freezing the backbone to retain zero-shot capability causes significant drop in supervised accuracy. Because of this,
recent works in literature typically train separate models for supervised and zero-shot action recognition. In this work, we propose a multimodal prompt learning scheme that works to balance the supervised and zero-shot performance under a single unified training. Our prompting approach on the vision side caters for three aspects: 1) Global video-level prompts to model the data distribution; 2) Local frame-level prompts to provide per-frame discriminative
conditioning; and 3) a summary prompt to extract a condensed video representation. Additionally, we define a prompting scheme on the text side to augment the textual context. Through this prompting scheme, we can achieve state-of-the-art zero-shot performance on Kinetics-600, HMDB51 and UCF101 while remaining competitive in the supervised setting. By keeping the pretrained backbone frozen, we optimize a much lower number of parameters and retain the existing general representation which helps achieve the strong zero-shot performance. Our codes and models will be publicly released. },
keywords = {CVPR},
pubstate = {published},
tppubtype = {conference}
}
recent works in literature typically train separate models for supervised and zero-shot action recognition. In this work, we propose a multimodal prompt learning scheme that works to balance the supervised and zero-shot performance under a single unified training. Our prompting approach on the vision side caters for three aspects: 1) Global video-level prompts to model the data distribution; 2) Local frame-level prompts to provide per-frame discriminative
conditioning; and 3) a summary prompt to extract a condensed video representation. Additionally, we define a prompting scheme on the text side to augment the textual context. Through this prompting scheme, we can achieve state-of-the-art zero-shot performance on Kinetics-600, HMDB51 and UCF101 while remaining competitive in the supervised setting. By keeping the pretrained backbone frozen, we optimize a much lower number of parameters and retain the existing general representation which helps achieve the strong zero-shot performance. Our codes and models will be publicly released.
Clark, Brandon Eric; Kerrigan, Alec; Kulkarni, Parth Parag; Cepeda, Vicente Vivanco; Shah, Mubarak
Where We Are and What We're Looking At: Query Based Worldwide Image Geo-localization Using Hierarchies and Scenes Conference
IEEE Computer Vision and Pattern Recognition, 2023.
Abstract | Tags: CVPR | Links:
@conference{Clark2023,
title = {Where We Are and What We're Looking At: Query Based Worldwide Image Geo-localization Using Hierarchies and Scenes},
author = {Brandon Eric Clark and Alec Kerrigan and Parth Parag Kulkarni and Vicente Vivanco Cepeda and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/Camera-Ready-Full-Paper.pdf
https://github.com/AHKerrigan/GeoGuessNet
https://www.crcv.ucf.edu/wp-content/uploads/2018/11/CVPR23-Poster_THU-PM-246-1.pdf
https://www.youtube.com/watch?v=fp3hZGbwPqk},
year = {2023},
date = {2023-06-18},
urldate = {2023-06-18},
publisher = {IEEE Computer Vision and Pattern Recognition},
abstract = {Determining the exact latitude and longitude that a photo was taken is a useful and widely applicable task, yet it remains exceptionally difficult despite the accelerated progress of other computer vision tasks. Most previous approaches have opted to learn single representations of query images, which are then classified at different levels of geographic granularity. These approaches fail to exploit the different visual cues that give context to different hierarchies, such as the country, state, and city level. To this end, we introduce an end-to-end transformer-based architecture that exploits the relationship between different geographic levels (which we refer to as hierarchies) and the corresponding visual scene information in an image through hierarchical cross-attention. We achieve this by learning a query for each geographic hierarchy and scene type. Furthermore, we learn a separate representation for different environmental scenes, as different scenes in the same location are often defined by completely different visual features. We achieve state of the art accuracy on 4 standard geo-localization datasets : Im2GPS, Im2GPS3k, YFCC4k, and YFCC26k, as well as qualitatively demonstrate how our method learns different representations for different visual hierarchies and scenes, which has not been demonstrated in the previous methods. Above previous testing datasets mostly consist of iconic landmarks or images taken from social media, which makes the dataset a simple memory task, or makes it biased towards certain places. To address this issue we introduce a much harder testing dataset, Google-World-Streets-15k, comprised of images taken from Google Streetview covering the whole planet and present state of the art results. Our code can be found at https://github.com/AHKerrigan/GeoGuessNet.
},
keywords = {CVPR},
pubstate = {published},
tppubtype = {conference}
}
Rana, Aayush; Rawat, Yogesh
Hybrid Active Learning via Deep Clustering for Video Action Detection Conference
IEEE Computer Vision and Pattern Recognition, 2023.
Tags: CVPR | Links:
@conference{Rana2023,
title = {Hybrid Active Learning via Deep Clustering for Video Action Detection},
author = {Aayush Rana and Yogesh Rawat},
url = {https://www.crcv.ucf.edu/research/projects/hybrid-active-learning-via-deep-clustering-for-video-action-detection/},
year = {2023},
date = {2023-06-18},
urldate = {2023-06-18},
publisher = {IEEE Computer Vision and Pattern Recognition},
keywords = {CVPR},
pubstate = {published},
tppubtype = {conference}
}
Chantry, Madeline; Biyani, Naman; Kamtam, Prudvi; Vyas, Shruti; Palangi, Hamid; Vineet, Vibhav; Rawat, Yogesh
A Large-scale Robustness Analysis of Video Action Recognition Models Conference
IEEE Computer Vision and Pattern Recognition, 2023.
Tags: CVPR | Links:
@conference{Chantry2023,
title = {A Large-scale Robustness Analysis of Video Action Recognition Models},
author = {Madeline Chantry and Naman Biyani and Prudvi Kamtam and Shruti Vyas and Hamid Palangi and Vibhav Vineet and Yogesh Rawat},
url = {https://sites.google.com/view/videorobustnessbenchmark/home
https://www.crcv.ucf.edu/research/projects/ucf101-ds-action-recognition-for-real-world-distribution-shifts/
https://github.com/Maddy12/ActionRecognitionRobustnessEval
https://youtu.be/pv2AJ_t-v90
https://www.crcv.ucf.edu/wp-content/uploads/2018/11/PosterCVPR2023.png},
year = {2023},
date = {2023-06-18},
urldate = {2023-06-18},
publisher = {IEEE Computer Vision and Pattern Recognition},
keywords = {CVPR},
pubstate = {published},
tppubtype = {conference}
}
Zhu, Sijie; Lin, Zhe; Cohen, Scott; Kuen, Jason; Zhang, Zhifei; Chen, Chen
TopNet: Transformer-based Object Placement Network for Image Compositing Conference
IEEE Computer Vision and Pattern Recognition, 2023.
Tags: CVPR | Links:
@conference{Zhu2023b,
title = {TopNet: Transformer-based Object Placement Network for Image Compositing },
author = {Sijie Zhu and Zhe Lin and Scott Cohen and Jason Kuen and Zhifei Zhang and Chen Chen},
url = {https://arxiv.org/pdf/2304.03372.pdf},
year = {2023},
date = {2023-06-18},
urldate = {2023-06-18},
publisher = {IEEE Computer Vision and Pattern Recognition},
keywords = {CVPR},
pubstate = {published},
tppubtype = {conference}
}
Zheng, Ce; Mendieta, Matias; Yang, Taojiannan; Qi, Guo-Jun; Chen, Chen
FeatER: An Efficient Network for Human Reconstruction via Feature Map-Based TransformER Conference
IEEE Computer Vision and Pattern Recognition, 2023.
Tags: CVPR | Links:
@conference{Zheng2023,
title = {FeatER: An Efficient Network for Human Reconstruction via Feature Map-Based TransformER},
author = {Ce Zheng and Matias Mendieta and Taojiannan Yang and Guo-Jun Qi and Chen Chen},
url = {https://arxiv.org/pdf/2205.15448.pdf
https://zczcwh.github.io/feater_page/
https://github.com/zczcwh/FeatER},
year = {2023},
date = {2023-06-18},
urldate = {2023-06-18},
publisher = {IEEE Computer Vision and Pattern Recognition},
keywords = {CVPR},
pubstate = {published},
tppubtype = {conference}
}
Zheng, Ce; Liu, Xianpeng; Qi, Guo-Jun; Chen, Chen
POTTER: Pooling Attention Transformer for Efficient Human Mesh Recovery Conference
IEEE Computer Vision and Pattern Recognition, 2023.
Tags: CVPR | Links:
@conference{Zheng2023b,
title = {POTTER: Pooling Attention Transformer for Efficient Human Mesh Recovery},
author = {Ce Zheng and Xianpeng Liu and Guo-Jun Qi and Chen Chen},
url = {https://arxiv.org/pdf/2303.13357.pdf
https://zczcwh.github.io/potter_page/
https://github.com/zczcwh/POTTER},
year = {2023},
date = {2023-06-18},
urldate = {2023-06-18},
publisher = {IEEE Computer Vision and Pattern Recognition},
keywords = {CVPR},
pubstate = {published},
tppubtype = {conference}
}
Zhao, Qitao; Zheng, Ce; Liu, Mengyuan; Wang, Pichao; Chen, Chen
PoseFormerV2: Exploring Frequency Domain for Efficient and Robust 3D Human Pose Estimation Conference
IEEE Computer Vision and Pattern Recognition, 2023.
Tags: CVPR | Links:
@conference{nokey,
title = {PoseFormerV2: Exploring Frequency Domain for Efficient and Robust 3D Human Pose Estimation},
author = {Qitao Zhao and Ce Zheng and Mengyuan Liu and Pichao Wang and Chen Chen},
url = {https://arxiv.org/pdf/2303.17472.pdf
https://qitaozhao.github.io/PoseFormerV2
https://github.com/QitaoZhao/PoseFormerV2},
year = {2023},
date = {2023-06-18},
urldate = {2023-06-18},
publisher = {IEEE Computer Vision and Pattern Recognition},
keywords = {CVPR},
pubstate = {published},
tppubtype = {conference}
}
Zheng, Ce; Wu, Wenhan; Chen, Chen; Yang, Taojiannan; Zhu, Sijie; Shen, Ju; Kehtarnavaz, Nasser; Shah, Mubarak
Deep Learning-Based Human Pose Estimation: A Survey Journal Article
In: ACM Computing Surveys, 2023.
@article{Zheng2023c,
title = {Deep Learning-Based Human Pose Estimation: A Survey},
author = {Ce Zheng and Wenhan Wu and Chen Chen and Taojiannan Yang and Sijie Zhu and Ju Shen and Nasser Kehtarnavaz and Mubarak Shah},
editor = {Albert Y H Zomaya},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/3603618.pdf
https://github.com/zczcwh/DL-HPE},
doi = {10.1145/3603618},
year = {2023},
date = {2023-06-09},
journal = {ACM Computing Surveys},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Kini, Jyoti; Mian, Ajmal; Shah, Mubarak
3DMODT: Attention-Guided Affinities for Joint Detection & Tracking in 3D Point Clouds Conference
IEEE International Conference on Robotics and Automation, 2023.
Tags: ICRA | Links:
@conference{Kini2023,
title = {3DMODT: Attention-Guided Affinities for Joint Detection & Tracking in 3D Point Clouds},
author = {Jyoti Kini and Ajmal Mian and Mubarak Shah},
url = {https://arxiv.org/pdf/2211.00746.pdf},
year = {2023},
date = {2023-05-29},
urldate = {2023-05-29},
booktitle = {IEEE International Conference on Robotics and Automation},
keywords = {ICRA},
pubstate = {published},
tppubtype = {conference}
}
Sangam, Tushar; Dave, Ishan Rajendrakumar; Sultani, Waqas; Shah, Mubarak
TransVisDrone: Spatio-Temporal Transformer for Vision-based Drone-to-Drone Detection in Aerial Videos Conference
IEEE International Conference on Robotics and Automation, 2023.
Tags: ICRA | Links:
@conference{Sangam2023,
title = {TransVisDrone: Spatio-Temporal Transformer for Vision-based Drone-to-Drone Detection in Aerial Videos},
author = {Tushar Sangam and Ishan Rajendrakumar Dave and Waqas Sultani and Mubarak Shah},
url = {https://arxiv.org/pdf/2210.08423.pdf},
year = {2023},
date = {2023-05-29},
booktitle = {IEEE International Conference on Robotics and Automation},
keywords = {ICRA},
pubstate = {published},
tppubtype = {conference}
}
Yang, Taojiannan; Zhu, Yi; Xie, Yusheng; Zhang, Aston; Chen, Chen; Li, Mu
AIM: Adapting Image Models for Efficient Video Understanding Conference
AIM: Adapting Image Models for Efficient Video Understanding, Eleventh International Conference on Learning Representations (ICLR), 2023.
@conference{Yang2023,
title = {AIM: Adapting Image Models for Efficient Video Understanding},
author = {Taojiannan Yang and Yi Zhu and Yusheng Xie and Aston Zhang and Chen Chen and Mu Li},
year = {2023},
date = {2023-05-01},
urldate = {2023-05-01},
booktitle = {AIM: Adapting Image Models for Efficient Video Understanding},
publisher = {Eleventh International Conference on Learning Representations (ICLR)},
abstract = {Recent vision transformer based video models mostly follow the ``image pre-training then finetuning" paradigm and have achieved great success on multiple video benchmarks. However, full finetuning such a video model could be computationally expensive and unnecessary, given the pre-trained image transformer models have demonstrated exceptional transferability. In this work, we propose a novel method to Adapt pre-trained Image Models (AIM) for efficient video understanding. By freezing the pre-trained image model and adding a few lightweight Adapters, we introduce spatial adaptation, temporal adaptation and joint adaptation to gradually equip an image model with spatiotemporal reasoning capability. We show that our proposed AIM can achieve competitive or even better performance than prior arts with substantially fewer tunable parameters on four video action recognition benchmarks. Thanks to its simplicity, our method is also generally applicable to different image pre-trained models, which has the potential to leverage more powerful image foundation models in the future. },
keywords = {ICLR},
pubstate = {published},
tppubtype = {conference}
}
Beetham, James; Kardan, Navid; Mian, Ajmal; Shah, Mubarak
Dual Student Networks for Data-Free Model Stealing Conference
Dual Student Networks for Data-Free Model Stealing , Eleventh International Conference on Learning Representations (ICLR), 2023.
@conference{Beetham2023,
title = {Dual Student Networks for Data-Free Model Stealing},
author = {James Beetham and Navid Kardan and Ajmal Mian and Mubarak Shah},
year = {2023},
date = {2023-05-01},
urldate = {2023-05-01},
booktitle = {Dual Student Networks for Data-Free Model Stealing },
publisher = {Eleventh International Conference on Learning Representations (ICLR)},
abstract = {Data-free model stealing aims to replicate a target model without direct access to either the training data or the target model. To accomplish this, existing methods use a generator to produce samples in order to train a student model to match the target model outputs. To this end, the two main challenges are estimating gradients of the target model without access to its parameters, and generating a diverse set of images that thoroughly explores the input space. We propose a Dual Student method where two students are symmetrically trained in order to provide the generator a criterion to generate samples that the two students disagree on. On one hand, disagreement on a sample implies at least one student has classified the sample incorrectly when compared with the target model. This push towards disagreeing samples implicitly encourages exploring a more diverse region of input space. On the other hand, our method utilizes gradients of student models to indirectly estimate gradients of the target model. We show that this novel training objective for the generator network is equivalent to optimizing a lower bound on the generator’s loss if we had access to the target model gradients. In other words, our method alters the standard data-free model stealing paradigm by substituting the target model with a separate student model, thereby creating a lower bound which can be directly optimized without additional target model queries or separate synthetic datasets. We show that our new optimization framework provides more accurate gradient estimation of the target model and better accuracies on benchmark classification datasets. Additionally, our approach balances improved query efficiency with training computation cost. Finally, we demonstrate that our method serves as a better proxy model for transfer-based adversarial attacks than existing data-free model stealing methods.},
keywords = {ICLR},
pubstate = {published},
tppubtype = {conference}
}
Yang, Peiyu; Akhtar, Naveed; Wen, Zeyi; Shah, Mubarak; Mian, Ajmal
Re-calibrating Feature Attributions for Model Interpretation Conference
Re-calibrating Feature Attributions for Model Interpretation, Eleventh International Conference on Learning Representations (ICLR), notable top 25%, 2023.
Tags: ICLR
@conference{nokey,
title = {Re-calibrating Feature Attributions for Model Interpretation},
author = {Peiyu Yang and Naveed Akhtar and Zeyi Wen and Mubarak Shah and Ajmal Mian},
year = {2023},
date = {2023-05-01},
urldate = {2023-05-01},
booktitle = {Re-calibrating Feature Attributions for Model Interpretation},
publisher = {Eleventh International Conference on Learning Representations (ICLR), notable top 25%},
keywords = {ICLR},
pubstate = {published},
tppubtype = {conference}
}
Barbalau, Antonio; Ionescu, Radu Tudor; Georgescu, Mariana-Iuliana; Dueholm, Jacob; Ramachandra, Bharathkumar; Nasrollahi, Kamal; Khan, Fahad Shahbaz; Moeslund, Thomas B.; Shah, Mubarak
SSMTL++: Revisiting Self-Supervised Multi-Task Learning for Video Anomaly Detection Journal Article
In: Computer Vision and Image Understanding, 2023.
Tags: CVIU | Links:
@article{Barbalau2023,
title = {SSMTL++: Revisiting Self-Supervised Multi-Task Learning for Video Anomaly Detection},
author = {Antonio Barbalau and Radu Tudor Ionescu and Mariana-Iuliana Georgescu and Jacob Dueholm and Bharathkumar Ramachandra and Kamal Nasrollahi and Fahad Shahbaz Khan and Thomas B. Moeslund and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/SSMTL.pdf},
year = {2023},
date = {2023-02-11},
urldate = {2023-02-11},
journal = {Computer Vision and Image Understanding},
keywords = {CVIU},
pubstate = {published},
tppubtype = {article}
}
Vahidian, Saeed; Morafah, Mahdi; Wang, Weijia; Kungurtsev, Vyacheslav; Chen, Chen; Shah, Mubarak; Lin, Bill
Efficient Distribution Similarity Identification in Clustered Federated Learning via Principal Angles Between Client Data Subspaces Conference
37th AAAI Conference on Artificial Intelligence, 2023.
Tags: AAAI | Links:
@conference{Vahidian2023,
title = {Efficient Distribution Similarity Identification in Clustered Federated Learning via Principal Angles Between Client Data Subspaces},
author = {Saeed Vahidian and Mahdi Morafah and Weijia Wang and Vyacheslav Kungurtsev and Chen Chen and Mubarak Shah and Bill Lin},
url = {https://arxiv.org/abs/2209.10526},
year = {2023},
date = {2023-02-07},
urldate = {2023-02-07},
publisher = {37th AAAI Conference on Artificial Intelligence},
keywords = {AAAI},
pubstate = {published},
tppubtype = {conference}
}
Zhong, Xian; Li, Zipeng; Chen, Shuqin; Jiang, Kui; Chen, Chen; Ye, Mang
Refined Semantic Enhancement Towards Frequency Diffusion for Video Captioning Conference
37th AAAI Conference on Artificial Intelligence, 2023.
Tags: AAAI | Links:
@conference{Zhong2023,
title = {Refined Semantic Enhancement Towards Frequency Diffusion for Video Captioning},
author = {Xian Zhong and Zipeng Li and Shuqin Chen and Kui Jiang and Chen Chen and Mang Ye},
url = {https://arxiv.org/abs/2211.15076},
year = {2023},
date = {2023-02-07},
publisher = {37th AAAI Conference on Artificial Intelligence},
keywords = {AAAI},
pubstate = {published},
tppubtype = {conference}
}
Liu, Mengyuan; Meng, Fanyang; Chen, Chen; Wu, Songtao
Novel Motion Patterns Matter for Practical Skeleton-based Action Recognition Conference
37th AAAI Conference on Artificial Intelligence, 2023.
Tags: AAAI
@conference{Liu2023,
title = {Novel Motion Patterns Matter for Practical Skeleton-based Action Recognition},
author = {Mengyuan Liu and Fanyang Meng and Chen Chen and Songtao Wu},
year = {2023},
date = {2023-02-07},
publisher = {37th AAAI Conference on Artificial Intelligence},
keywords = {AAAI},
pubstate = {published},
tppubtype = {conference}
}
Gupta, Rohit; Akhtar, Naveed; Mian, Ajmal; Shah, Mubarak
Contrastive Self-Supervised Learning Leads to Higher Adversarial Susceptibility Conference
37th AAAI Conference on Artificial Intelligence, 2023.
Tags: AAAI | Links:
@conference{Gupta2023,
title = {Contrastive Self-Supervised Learning Leads to Higher Adversarial Susceptibility},
author = {Rohit Gupta and Naveed Akhtar and Ajmal Mian and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/2207.10862.pdf},
year = {2023},
date = {2023-02-07},
publisher = {37th AAAI Conference on Artificial Intelligence},
keywords = {AAAI},
pubstate = {published},
tppubtype = {conference}
}
2022
Rana, Aayush; Rawat, Yogesh
Are all Frames Equal? Active Sparse Labeling for Video Action Detection Conference
36th Conference on Neural Information Processing Systems (NeurIPS 2022), 2022.
Abstract | Tags: NeurIPS | Links:
@conference{nokey,
title = {Are all Frames Equal? Active Sparse Labeling for Video Action Detection },
author = {Aayush Rana and Yogesh Rawat},
url = {https://www.crcv.ucf.edu/research/projects/active-sparse-labeling-for-video-action-detection/
https://www.crcv.ucf.edu/wp-content/uploads/2018/11/neurips_poster_ASL_upload.png
https://github.com/aayushjr/ASL-video },
year = {2022},
date = {2022-11-28},
urldate = {2022-11-28},
publisher = {36th Conference on Neural Information Processing Systems (NeurIPS 2022)},
abstract = {Video action detection requires annotations at every frame, which drastically increases the labeling cost. In this work, we focus on efficient labeling of videos for action detection to minimize this cost. We propose active sparse labeling (ASL), a novel active learning strategy for video action detection. We propose a novel frame-level scoring mechanism aimed at selecting the most informative frames in a video. We also introduce a novel loss formulation which enables training of action detection model with these sparsely selected frames. We evaluated the proposed approach on two different action detection benchmark datasets, UCF-101-24 and J-HMDB-21, and observed that active sparse labeling can be very effective in saving annotation costs. We demonstrate that the proposed approach performs better than random selection, outperforming all other baselines, with performance comparable to supervised approach using merely 10% annotations.},
keywords = {NeurIPS},
pubstate = {published},
tppubtype = {conference}
}
Schiappa, Madeline Chantry; Vyas, Shruti; Palangi, Hamid; Rawat, Yogesh; Vineet, Vibhav
Robustness Analysis of Video-Language Models Against Visual and Language Perturbations Conference
36th Conference on Neural Information Processing Systems (NeurIPS 2022), 2022.
Abstract | Tags: NeurIPS | Links:
@conference{Schiappa2022,
title = {Robustness Analysis of Video-Language Models Against Visual and Language Perturbations},
author = {Madeline Chantry Schiappa and Shruti Vyas and Hamid Palangi and Yogesh Rawat and Vibhav Vineet},
url = {https://sites.google.com/view/videolanguagerobustness/home
https://openreview.net/forum?id=A79jAS4MeW9
https://github.com/Maddy12/VideoLanguageModelRobustness/tree/master},
year = {2022},
date = {2022-11-28},
publisher = {36th Conference on Neural Information Processing Systems (NeurIPS 2022)},
abstract = {Joint visual and language modeling on large-scale datasets has recently shown good progress in multi-modal tasks when compared to single modal learning. However, robustness of these approaches against real-world perturbations has not been studied. In this work, we perform the first extensive robustness study of video-language models against various real-world perturbations. We focus on text-to-video retrieval and propose two large-scale benchmark datasets, MSRVTT-P and YouCook2-P, which utilize 90 different visual and 35 different text perturbations. The study reveals some interesting initial findings from the studied models: 1) models are more robust when text is perturbed versus when video is perturbed, 2) models that are pre-trained are more robust than those trained from scratch, 3) models attend more to scene and objects rather than motion and action. We hope this study will serve as a benchmark and guide future research in robust video-language learning. The benchmark introduced in this study along with the code and datasets is available at https://bit.ly/3CNOly4.},
keywords = {NeurIPS},
pubstate = {published},
tppubtype = {conference}
}
Xu, Ziwei; Rawat, Yogesh; Wong, Yongkang; Kankanhalli, Mohan; Shah, Mubarak
Don’t Pour Cereal into Coffee: Differentiable Temporal Logic for Temporal Action Segmentation Conference
36th Conference on Neural Information Processing Systems (NeurIPS 2022), 2022.
Abstract | Tags: NeurIPS | Links:
@conference{Xu2022,
title = {Don’t Pour Cereal into Coffee: Differentiable Temporal Logic for Temporal Action Segmentation},
author = {Ziwei Xu and Yogesh Rawat and Yongkang Wong and Mohan Kankanhalli and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/ziwei_neurips2022.pdf
https://diff-tl.github.io/
https://github.com/ZiweiXU/DTL-action-segmentation},
year = {2022},
date = {2022-11-09},
urldate = {2022-11-09},
publisher = {36th Conference on Neural Information Processing Systems (NeurIPS 2022)},
abstract = {We propose Differentiable Temporal Logic (DTL), a model-agnostic framework that introduces temporal constraints to deep networks. DTL treats the outputs of a network as a truth assignment of a temporal logic formula, and computes a temporal logic loss reflecting the consistency between the output and the constraints. We propose a comprehensive set of constraints, which are implicit in data annotations, and incorporate them with deep networks via DTL. We evaluate the effectiveness of DTL on the temporal action segmentation task and observe improved performance and reduced logical errors in the output of different task models. Furthermore, we provide an extensive analysis to visualize the desirable effects of DTL.},
keywords = {NeurIPS},
pubstate = {published},
tppubtype = {conference}
}
Vyas, Shruti; Chen, Chen; Shah, Mubarak
GAMa: Cross-view Video Geo-localization Conference
European Conference on Computer Vision, 2022.
Abstract | Tags: ECCV | Links:
@conference{Vyas2022,
title = {GAMa: Cross-view Video Geo-localization},
author = {Shruti Vyas and Chen Chen and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/1512.pdf
https://www.crcv.ucf.edu/wp-content/uploads/2018/11/1512-supp.pdf
https://youtu.be/KSHuer_VXJo},
year = {2022},
date = {2022-10-23},
urldate = {2022-10-23},
booktitle = {European Conference on Computer Vision},
abstract = {The existing work in cross-view geo-localization is based on images where a ground panorama is matched to an aerial image. In this work, we focus on ground videos instead of images which provides ad-ditional contextual cues which are important for this task. There are no existing datasets for this problem, therefore we propose GAMa dataset, a large-scale dataset with ground videos and corresponding aerial images. We also propose a novel approach to solve this problem. At clip-level, a short video clip is matched with corresponding aerial image and is later used to get video-level geo-localization of a long video. Moreover, we propose a hierarchical approach to further improve the clip-level geo-localization. On this challenging dataset, with unaligned images and lim-ited field of view, our proposed method achieves a Top-1 recall rate of 19.4% and 45.1% @1.0mile. Code & dataset are available at this link.},
keywords = {ECCV},
pubstate = {published},
tppubtype = {conference}
}
Wang, Wenxuan; Chen, Chen; Wang, Jing; Zha, Sen; Zhang, Yan; Li, Jiangyun
Med-DANet: Dynamic Architecture Network for Efficient Medical Volumetric Segmentation Conference
European Conference on Computer Vision, 2022.
Abstract | Tags: ECCV | Links:
@conference{Wang2022,
title = {Med-DANet: Dynamic Architecture Network for Efficient Medical Volumetric Segmentation},
author = {Wenxuan Wang and Chen Chen and Jing Wang and Sen Zha and Yan Zhang and Jiangyun Li},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/2206.06575.pdf},
year = {2022},
date = {2022-10-23},
urldate = {2022-10-23},
booktitle = {European Conference on Computer Vision},
abstract = {For 3D medical image (e.g. CT and MRI) segmentation, the difficulty of segmenting each slice in a clinical case varies greatly. Previous research on volumetric medical image segmentation in a slice-byslice manner conventionally use the identical 2D deep neural network to segment all the slices of the same case, ignoring the data heterogeneity among image slices. In this paper, we focus on multi-modal 3D MRI brain tumor segmentation and propose a dynamic architecture network named Med-DANet based on adaptive model selection to achieve effective accuracy and efficiency trade-off. For each slice of the input 3D MRI volume, our proposed method learns a slice-specific decision by the Decision Network to dynamically select a suitable model from the predefined Model Bank for the subsequent 2D segmentation task. Extensive experimental results on both BraTS 2019 and 2020 datasets show that our proposed method achieves comparable or better results than previous state-of-the art methods for 3D MRI brain tumor segmentation with much less model complexity. Compared with the state-of-the-art 3D method TransBTS, the proposed framework improves the model efficiency by up to 3.5 × without sacrificing the accuracy. Our code will be publicly available at https://github.com/Wenxuan-1119/Med-DANet.},
keywords = {ECCV},
pubstate = {published},
tppubtype = {conference}
}
Zhu, Sijie; Lin, Zhe; Cohen, Scott; Kuen, Jason; Zhang, Zhifei; Chen, Chen
GALA: Toward Geometry-and-Lighting-Aware Object Search for Compositing Conference
European Conference on Computer Vision, 2022.
Abstract | Tags: ECCV | Links:
@conference{Zhu2022,
title = {GALA: Toward Geometry-and-Lighting-Aware Object Search for Compositing},
author = {Sijie Zhu and Zhe Lin and Scott Cohen and Jason Kuen and Zhifei Zhang and Chen Chen},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/2204.00125.pdf
https://www.crcv.ucf.edu/wp-content/uploads/2018/11/GALA_supplementary.pdf},
year = {2022},
date = {2022-10-23},
urldate = {2022-10-23},
booktitle = {European Conference on Computer Vision},
abstract = {Compositing-aware object search aims to find the most compatible objects for compositing given a background image and a query bounding box. Previous works focus on learning compatibility between the foreground object and background, but fail to learn other important factors from large-scale data, i.e. geometry and lighting. To move a step further, this paper proposes GALA (Geometry-and-Lighting-Aware), a generic foreground object search method with discriminative modeling on geometry and lighting compatibility for open-world image compositing. Remarkably, it achieves state-of-the-art results on the CAIS dataset and generalizes well on large-scale open-world datasets, i.e. Pixabay and
Open Images. In addition, our method can effectively handle non-box scenarios, where users only provide background images without any input bounding box. A web demo (see supplementary materials) is built to showcase applications of the proposed method for compositing-aware search and automatic location/scale prediction for the foreground object. },
keywords = {ECCV},
pubstate = {published},
tppubtype = {conference}
}
Open Images. In addition, our method can effectively handle non-box scenarios, where users only provide background images without any input bounding box. A web demo (see supplementary materials) is built to showcase applications of the proposed method for compositing-aware search and automatic location/scale prediction for the foreground object.
Khan, Aisha Urooj; Kuehne, Hilde; Gan, Chuang; Lobo, Niels Da Vitoria; Shah, Mubarak
Weakly Supervised Grounding for VQA in Vision-Language Transformers Conference
European Conference on Computer Vision, 2022.
Abstract | Tags: ECCV | Links:
@conference{Khan2022,
title = {Weakly Supervised Grounding for VQA in Vision-Language Transformers},
author = {Aisha Urooj Khan and Hilde Kuehne and Chuang Gan and Niels Da Vitoria Lobo and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/1011.pdf
https://www.crcv.ucf.edu/wp-content/uploads/2018/11/1011-supp.pdf
https://github.com/aurooj/WSG-VQA-VLTransformers
https://youtu.be/dekmVb6lq3I},
year = {2022},
date = {2022-10-23},
urldate = {2022-10-23},
booktitle = {European Conference on Computer Vision},
abstract = {Transformers for visual-language representation learning have been getting a lot of interest and shown tremendous performance on visual question answering (VQA) and grounding. However, most systems that show good performance of those tasks still rely on pre-trained object
detectors during training, which limits their applicability to the object classes available for those detectors. To mitigate this limitation, this paper
focuses on the problem of weakly supervised grounding in the context of visual question answering in transformers. Our approach leverages
capsules by transforming each visual token into a capsule representation in the visual encoder; it then uses activations from language self-attention layers as a text-guided selection module to mask those capsules before they are forwarded to the next layer. We evaluate our approach on the challenging GQA as well as VQA-HAT dataset for VQA grounding. Our experiments show that: while removing the information of masked
objects from standard transformer architectures leads to a significant drop in performance, the integration of capsules significantly improves the grounding ability of such systems and provides new state-of-the-art results compared to other approaches in the field.},
keywords = {ECCV},
pubstate = {published},
tppubtype = {conference}
}
detectors during training, which limits their applicability to the object classes available for those detectors. To mitigate this limitation, this paper
focuses on the problem of weakly supervised grounding in the context of visual question answering in transformers. Our approach leverages
capsules by transforming each visual token into a capsule representation in the visual encoder; it then uses activations from language self-attention layers as a text-guided selection module to mask those capsules before they are forwarded to the next layer. We evaluate our approach on the challenging GQA as well as VQA-HAT dataset for VQA grounding. Our experiments show that: while removing the information of masked
objects from standard transformer architectures leads to a significant drop in performance, the integration of capsules significantly improves the grounding ability of such systems and provides new state-of-the-art results compared to other approaches in the field.
Rizve, Mamshad Nayeem; Kardan, Navid; Khan, Salman; Khan, Fahad Shahbaz; Shah, Mubarak
OpenLDN: Learning to Discover Novel Classes for Open-World Semi-Supervised Learning Conference
European Conference on Computer Vision, 2022.
Abstract | Tags: ECCV | Links:
@conference{Rizve2022,
title = {OpenLDN: Learning to Discover Novel Classes for Open-World Semi-Supervised Learning},
author = {Mamshad Nayeem Rizve and Navid Kardan and Salman Khan and Fahad Shahbaz Khan and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/6665.pdf
https://www.crcv.ucf.edu/wp-content/uploads/2018/11/6665-supp.pdf
https://github.com/nayeemrizve/OpenLDN
https://youtu.be/p2lYqvklcjA},
year = {2022},
date = {2022-10-23},
urldate = {2022-10-23},
booktitle = {European Conference on Computer Vision},
abstract = {Semi-supervised learning (SSL) is one of the dominant approaches to address the annotation bottleneck of supervised learning. Recent SSL methods can effectively leverage a large repository of unlabeled data to improve performance while relying on a small set of labeled data. One common assumption in most SSL methods is that the labeled and unlabeled data are from the same data distribution. However, this is hardly the case in many real-world scenarios, which limits their applicability. In this work, instead, we attempt to solve the challenging open-world SSL problem that does not make such an assumption. In the open-world SSL problem, the objective is to recognize samples of known classes, and simultaneously detect and cluster samples belonging to novel classes present in unlabeled data. This work introduces OpenLDN that utilizes a pairwise similarity loss to discover novel classes. Using a bi-level optimization rule this pairwise similarity loss exploits the information available in the labeled set to implicitly cluster novel class samples, while simultaneously recognizing samples from known classes. After discovering novel classes, OpenLDN transforms the open-world SSL problem into a standard SSL problem to achieve additional performance gains using existing SSL methods. Our extensive experiments demonstrate that OpenLDN outperforms the current state-of-the-art methods on multiple popular classification benchmarks while providing a better accuracy/training time trade-off. Code: https://github.com/nayeemrizve/OpenLDN},
keywords = {ECCV},
pubstate = {published},
tppubtype = {conference}
}
Rizve, Mamshad Nayeem; Kardan, Navid; Shah, Mubarak
Towards Realistic Semi-Supervised Learning Conference
European Conference on Computer Vision, 2022.
Abstract | Tags: ECCV | Links:
@conference{Rizve2022b,
title = {Towards Realistic Semi-Supervised Learning},
author = {Mamshad Nayeem Rizve and Navid Kardan and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/7402.pdf
https://www.crcv.ucf.edu/wp-content/uploads/2018/11/7402-supp.pdf
https://github.com/nayeemrizve/TRSSL
https://youtu.be/mE7GeQ35WyY},
year = {2022},
date = {2022-10-23},
urldate = {2022-10-23},
booktitle = {European Conference on Computer Vision},
abstract = {Deep learning is pushing the state-of-the-art in many computer
vision applications. However, it relies on large annotated data
repositories, and capturing the unconstrained nature of the real-world
data is yet to be solved. Semi-supervised learning (SSL) complements
the annotated training data with a large corpus of unlabeled data to
reduce annotation cost. The standard SSL approach assumes unlabeled
data are from the same distribution as annotated data. Recently, a more
realistic SSL problem, called open-world SSL, is introduced, where the
unannotated data might contain samples from unknown classes. In this paper, we propose a novel pseudo-label based approach to tackle SSL in
open-world setting. At the core of our method, we utilize sample uncertainty and incorporate prior knowledge about class distribution to generate reliable class-distribution-aware pseudo-labels for unlabeled data belonging to both known and unknown classes. Our extensive experimentation showcases the effectiveness of our approach on several benchmark datasets, where it substantially outperforms the existing state-of-the art on seven diverse datasets including CIFAR-100 (∼17%), ImageNet-100 (∼5%), and Tiny ImageNet (∼9%). We also highlight the flexibility of our approach in solving novel class discovery task, demonstrate its stability in dealing with imbalanced data, and complement our approach with a technique to estimate the number of novel classes. Code: https://github.com/nayeemrizve/TRSSL},
keywords = {ECCV},
pubstate = {published},
tppubtype = {conference}
}
vision applications. However, it relies on large annotated data
repositories, and capturing the unconstrained nature of the real-world
data is yet to be solved. Semi-supervised learning (SSL) complements
the annotated training data with a large corpus of unlabeled data to
reduce annotation cost. The standard SSL approach assumes unlabeled
data are from the same distribution as annotated data. Recently, a more
realistic SSL problem, called open-world SSL, is introduced, where the
unannotated data might contain samples from unknown classes. In this paper, we propose a novel pseudo-label based approach to tackle SSL in
open-world setting. At the core of our method, we utilize sample uncertainty and incorporate prior knowledge about class distribution to generate reliable class-distribution-aware pseudo-labels for unlabeled data belonging to both known and unknown classes. Our extensive experimentation showcases the effectiveness of our approach on several benchmark datasets, where it substantially outperforms the existing state-of-the art on seven diverse datasets including CIFAR-100 (∼17%), ImageNet-100 (∼5%), and Tiny ImageNet (∼9%). We also highlight the flexibility of our approach in solving novel class discovery task, demonstrate its stability in dealing with imbalanced data, and complement our approach with a technique to estimate the number of novel classes. Code: https://github.com/nayeemrizve/TRSSL
Kumar, Aakash; Kini, Jyoti; Mian, Ajmal; Shah, Mubarak
Self Supervised Learning for Multiple Object Tracking in 3D Point Clouds Conference
2022 IEEE/RSJ International Conference on Intelligent Robots and Systems, 2022.
Abstract | Tags: IROS | Links:
@conference{Kumar2022,
title = {Self Supervised Learning for Multiple Object Tracking in 3D Point Clouds},
author = {Aakash Kumar and Jyoti Kini and Ajmal Mian and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/camera_ready_paper.pdf},
year = {2022},
date = {2022-10-23},
urldate = {2022-10-23},
booktitle = {2022 IEEE/RSJ International Conference on Intelligent Robots and Systems},
abstract = {Multiple object tracking in 3D point clouds has applications in mobile robots and autonomous driving. This is a challenging problem due to the sparse nature of the point clouds and the added difficulty of annotation in 3D for supervised learning. To overcome these challenges, we propose a neural network architecture that learns effective object features and their affinities in a self supervised fashion for multiple object tracking in 3D point clouds captured with LiDAR sensors. For self supervision, we use two approaches. First, we generate two augmented LiDAR frames from a single real frame by applying translation, rotation and cutout to the objects. Second, we synthesize a LiDAR frame using CAD models or primitive geometric shapes and then apply the above three augmentations to them. Hence, the ground truth object locations and associations are known in both frames for self supervision. This removes the need to annotate object associations in real data, and additionally the need for training data collection and annotation for object detection in synthetic data. To the best of our knowledge, this is the first self supervised multiple object tracking method for 3D data. Our model achieves state of the art results.},
keywords = {IROS},
pubstate = {published},
tppubtype = {conference}
}
Arif, Maliha; Yong, Calvin; Mahalanobis, Abhijit; Rahnavard, Nazanin
Background-Tolerant Object Classification with Embedded Segmentation Mask for Infrared and Color Imagery Conference
IEEE International Conference on Image Processing, 2022.
Abstract | Tags: ICIP | Links:
@conference{Arif2022,
title = {Background-Tolerant Object Classification with Embedded Segmentation Mask for Infrared and Color Imagery},
author = {Maliha Arif and Calvin Yong and Abhijit Mahalanobis and Nazanin Rahnavard},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/Final_ICIP2022_MA_submission.pdf
https://www.crcv.ucf.edu/wp-content/uploads/2018/11/bg_poster_ICIP2022_Single.jpg},
year = {2022},
date = {2022-10-16},
urldate = {2022-10-16},
booktitle = {IEEE International Conference on Image Processing},
abstract = {Even though convolutional neural networks (CNNs) can classify objects in images very accurately, it is well known that the attention of the network may not always be on the semantically important regions of the scene. It has been observed that networks often learn background textures, which are not relevant to the object of interest. In turn this makes the networks susceptible to variations and changes in the background
which may negatively affect their performance.
We propose a new three-step training procedure called split training to reduce this bias in CNNs for object recognition using Infrared imagery and Color (RGB) data. Our split training procedure has three steps. First, a baseline model is trained to recognize objects in images without background, and the activations produced by the higher layers are observed. Next, a second network is trained using Mean Square Error (MSE) loss to produce the same activations, but in response to the objects embedded in background. This forces the second network to ignore the background while focusing on the object of interest. Finally, with layers producing the activations frozen, the rest of the second network is trained using cross-entropy loss to classify the objects in images with background. Our training method outperforms the traditional training procedure in both a simple CNN architecture, as well as for deep CNNs like VGG and DenseNet, and learns to mimic human vision which focuses more on shape and structure than background with higher accuracy.
Index Terms— infrared imagery, background invariant learning, grad-CAM, split training, MS-COCO},
keywords = {ICIP},
pubstate = {published},
tppubtype = {conference}
}
which may negatively affect their performance.
We propose a new three-step training procedure called split training to reduce this bias in CNNs for object recognition using Infrared imagery and Color (RGB) data. Our split training procedure has three steps. First, a baseline model is trained to recognize objects in images without background, and the activations produced by the higher layers are observed. Next, a second network is trained using Mean Square Error (MSE) loss to produce the same activations, but in response to the objects embedded in background. This forces the second network to ignore the background while focusing on the object of interest. Finally, with layers producing the activations frozen, the rest of the second network is trained using cross-entropy loss to classify the objects in images with background. Our training method outperforms the traditional training procedure in both a simple CNN architecture, as well as for deep CNNs like VGG and DenseNet, and learns to mimic human vision which focuses more on shape and structure than background with higher accuracy.
Index Terms— infrared imagery, background invariant learning, grad-CAM, split training, MS-COCO
Pillai, Manu S; Bhattacharya, Abhijeet; Baweja, Tanmay; Gupta, Rohit; Shah, Mubarak
DEEPSAR: Vessel Detection In SAR Imagery With Noisy Labels Conference
IEEE International Conference on Image Processing, 2022.
Tags: ICIP | Links:
@conference{Pillai2023,
title = {DEEPSAR: Vessel Detection In SAR Imagery With Noisy Labels},
author = {Manu S Pillai and Abhijeet Bhattacharya and Tanmay Baweja and Rohit Gupta and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/ICIP_Submission.pdf},
year = {2022},
date = {2022-10-16},
urldate = {2023-10-08},
publisher = {IEEE International Conference on Image Processing},
keywords = {ICIP},
pubstate = {published},
tppubtype = {conference}
}
Kini, Jyoti; Shah, Mubarak
Tag-Based Attention Guided Bottom-Up Approach for Video Instance Segmentation Conference
26th International Conference on Pattern Recognition, 2022.
Tags: ICPR, Video Instance Segmentation | Links:
@conference{Kini2022b,
title = {Tag-Based Attention Guided Bottom-Up Approach for Video Instance Segmentation},
author = {Jyoti Kini and Mubarak Shah },
url = {https://arxiv.org/pdf/2204.10765.pdf},
year = {2022},
date = {2022-08-21},
urldate = {2022-08-21},
booktitle = {26th International Conference on Pattern Recognition},
issue = {arxiv:2204.10765},
keywords = {ICPR, Video Instance Segmentation},
pubstate = {published},
tppubtype = {conference}
}
Ristea, Nicolae-Catalin; Madan, Neelu; Ionescu, Radu Tudor; Nasrollahi, Kamal; Khan, Fahad Shahbaz; Moeslund, Thomas B.; Shah, Mubarak
Self-Supervised Predictive Convolutional Attentive Block for Anomaly Detection Conference
IEEE Computer Vision and Pattern Recognition, 2022.
Tags: Anomaly Detection, CVPR, Self-Supervised Learning, Self-Supervision | Links:
@conference{nokey,
title = {Self-Supervised Predictive Convolutional Attentive Block for Anomaly Detection},
author = {Nicolae-Catalin Ristea and Neelu Madan and Radu Tudor Ionescu and Kamal Nasrollahi and Fahad Shahbaz Khan and Thomas B. Moeslund and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/SSPCAB_camera-arxiv.pdf},
year = {2022},
date = {2022-06-19},
urldate = {2022-06-19},
publisher = {IEEE Computer Vision and Pattern Recognition},
keywords = {Anomaly Detection, CVPR, Self-Supervised Learning, Self-Supervision},
pubstate = {published},
tppubtype = {conference}
}
Karim, Nazmul; Rizve, Mamshad Nayeem; Rahnavard, Nazanin; Mian, Ajmal; Shah, Mubarak
UNICON: Combating Label Noise Through Uniform Selection and Contrastive Learning Conference
IEEE Computer Vision and Pattern Recognition, 2022.
Tags: Contrastive Learning, CVPR, Noisy Labels, Semi-supervised learning | Links:
@conference{nokey,
title = {UNICON: Combating Label Noise Through Uniform Selection and Contrastive Learning},
author = {Nazmul Karim and Mamshad Nayeem Rizve and Nazanin Rahnavard and Ajmal Mian and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/07363.pdf
https://www.crcv.ucf.edu/wp-content/uploads/2018/11/07363-supp.pdf
https://github.com/nazmul-karim170/unicon-noisy-label},
year = {2022},
date = {2022-06-19},
urldate = {2022-06-19},
publisher = {IEEE Computer Vision and Pattern Recognition},
keywords = {Contrastive Learning, CVPR, Noisy Labels, Semi-supervised learning},
pubstate = {published},
tppubtype = {conference}
}
Acsintoae, Andra; Florescu, Andrei; Georgescu, Mariana-Iuliana; Mare, Tudor; Sumedrea, Paul; Ionescu, Radu Tudor; Khan, Fahad Shahbaz; Shah, Mubarak
UBnormal: New Benchmark for Supervised Open-Set Video Anomaly Detection Conference
IEEE Computer Vision and Pattern Recognition, 2022.
Tags: Anomaly Detection, CVPR, Dataset | Links:
@conference{nokey,
title = {UBnormal: New Benchmark for Supervised Open-Set Video Anomaly Detection},
author = {Andra Acsintoae and Andrei Florescu and Mariana-Iuliana Georgescu and Tudor Mare and Paul Sumedrea and Radu Tudor Ionescu and Fahad Shahbaz Khan and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/04315.pdf
https://github.com/lilygeorgescu/UBnormal},
year = {2022},
date = {2022-06-19},
urldate = {2022-06-19},
publisher = {IEEE Computer Vision and Pattern Recognition},
keywords = {Anomaly Detection, CVPR, Dataset},
pubstate = {published},
tppubtype = {conference}
}
Dave, Ishan Rajendrakumar; Chen, Chen; Shah, Mubarak
SPAct: Self-supervised Privacy Preservation for Action Recognition Conference
IEEE Computer Vision and Pattern Recognition, 2022.
Tags: Action Recognition, CVPR, Privacy Preservation | Links:
@conference{nokey,
title = {SPAct: Self-supervised Privacy Preservation for Action Recognition},
author = {Ishan Rajendrakumar Dave and Chen Chen and Mubarak Shah},
url = {https://arxiv.org/pdf/2203.15205.pdf
https://github.com/DAVEISHAN/SPAct
https://www.youtube.com/watch?v=_PAlMT7ozts},
year = {2022},
date = {2022-06-19},
urldate = {2022-06-19},
publisher = {IEEE Computer Vision and Pattern Recognition},
keywords = {Action Recognition, CVPR, Privacy Preservation},
pubstate = {published},
tppubtype = {conference}
}
Kumar, Akash; Rawat, Yogesh Singh
End-to-End Semi-Supervised Learning for Video Action Detection Conference
IEEE Computer Vision and Pattern Recognition, 2022.
Tags: CVPR, Semi-supervised learning | Links:
@conference{nokey,
title = {End-to-End Semi-Supervised Learning for Video Action Detection},
author = {Akash Kumar and Yogesh Singh Rawat},
url = {https://arxiv.org/pdf/2203.04251.pdf},
year = {2022},
date = {2022-06-19},
urldate = {2022-06-19},
publisher = {IEEE Computer Vision and Pattern Recognition},
keywords = {CVPR, Semi-supervised learning},
pubstate = {published},
tppubtype = {conference}
}
Mendieta, Matias; Yang, Taojiannan; Wang, Pu; Lee, Minwoo; Ding, Zhengming; Chen, Chen
Local Learning Matters: Rethinking Data Heterogeneity in Federated Learning Conference
IEEE Computer Vision and Pattern Recognition, 2022.
Tags: CVPR, Federated Learning | Links:
@conference{nokey,
title = {Local Learning Matters: Rethinking Data Heterogeneity in Federated Learning},
author = {Matias Mendieta and Taojiannan Yang and Pu Wang and Minwoo Lee and Zhengming Ding and Chen Chen},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/11405.pdf
https://www.crcv.ucf.edu/wp-content/uploads/2018/11/11405_supp.pdf},
year = {2022},
date = {2022-06-19},
urldate = {2022-06-19},
publisher = {IEEE Computer Vision and Pattern Recognition},
keywords = {CVPR, Federated Learning},
pubstate = {published},
tppubtype = {conference}
}
Zhu, Sijie; Shah, Mubarak; Chen, Chen
TransGeo: Transformer Is All You Need for Cross-view Image Geo-localization Conference
IEEE Computer Vision and Pattern Recognition, 2022.
Tags: Cross-View, CVPR, Geo-Localization, Transformers | Links:
@conference{nokey,
title = {TransGeo: Transformer Is All You Need for Cross-view Image Geo-localization},
author = {Sijie Zhu and Mubarak Shah and Chen Chen},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/11695.pdf
https://www.crcv.ucf.edu/wp-content/uploads/2018/11/11695-supp.pdf},
year = {2022},
date = {2022-06-19},
urldate = {2022-06-19},
publisher = {IEEE Computer Vision and Pattern Recognition},
keywords = {Cross-View, CVPR, Geo-Localization, Transformers},
pubstate = {published},
tppubtype = {conference}
}
Cao, Jiale; Pang, Yenwai; Anwer, Rao Muhammad; Cholakkal, Hisham; Xie, Jin; Shah, Mubarak; Khan, Fahad Shahbaz
PSTR: End-to-End One-Step Person Search With Transformers Conference
IEEE Computer Vision and Pattern Recognition, 2022.
Abstract | Tags: CVPR, Re-Identification, Transformers, Visual Search | Links:
@conference{nokey,
title = {PSTR: End-to-End One-Step Person Search With Transformers},
author = {Jiale Cao and Yenwai Pang and Rao Muhammad Anwer and Hisham Cholakkal and Jin Xie and Mubarak Shah and Fahad Shahbaz Khan},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/05237-2.pdf
https://github.com/JialeCao001/PSTR},
year = {2022},
date = {2022-06-19},
urldate = {2022-06-19},
publisher = {IEEE Computer Vision and Pattern Recognition},
abstract = {We propose a novel one-step transformer-based person search framework, PSTR, that jointly performs person detection and re-identification (re-id) in a single architecture. PSTR comprises a person search-specialized (PSS) module that contains a detection encoder-decoder for person detection along with a discriminative re-id decoder for person re-id. The discriminative re-id decoder utilizes a multi-level supervision scheme with a shared decoder for
discriminative re-id feature learning and also comprises a part attention block to encode relationship between different parts of a person. We further introduce a simple multi-scale scheme to support re-id across person instances at different scales. PSTR jointly achieves the diverse objectives of object-level recognition (detection) and instance-level matching (re-id). To the best of our knowledge, we are the first to propose an end-to-end one-step
transformer-based person search framework. Experiments are performed on two popular benchmarks: CUHK-SYSU and PRW. Our extensive ablations reveal the merits of the proposed contributions. Further, the proposed PSTR sets a new state-of-the-art on both benchmarks. On the challenging
PRW benchmark, PSTR achieves a mean average precision (mAP) score of 56.5%. The source code is available at https://github.com/JialeCao001/PSTR.},
keywords = {CVPR, Re-Identification, Transformers, Visual Search},
pubstate = {published},
tppubtype = {conference}
}
discriminative re-id feature learning and also comprises a part attention block to encode relationship between different parts of a person. We further introduce a simple multi-scale scheme to support re-id across person instances at different scales. PSTR jointly achieves the diverse objectives of object-level recognition (detection) and instance-level matching (re-id). To the best of our knowledge, we are the first to propose an end-to-end one-step
transformer-based person search framework. Experiments are performed on two popular benchmarks: CUHK-SYSU and PRW. Our extensive ablations reveal the merits of the proposed contributions. Further, the proposed PSTR sets a new state-of-the-art on both benchmarks. On the challenging
PRW benchmark, PSTR achieves a mean average precision (mAP) score of 56.5%. The source code is available at https://github.com/JialeCao001/PSTR.
Gupta, Akshita; Narayan, Sanath; Joseph, K J; Khan, Salman; Khan, Fahad Shahbaz; Shah, Mubarak
OW-DETR: Open-world Detection Transformer Conference
IEEE Computer Vision and Pattern Recognition, 2022.
Abstract | Tags: CVPR, Object Detection, Open World, Transformers | Links:
@conference{nokey,
title = {OW-DETR: Open-world Detection Transformer},
author = {Akshita Gupta and Sanath Narayan and K J Joseph and Salman Khan and Fahad Shahbaz Khan and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/03815.pdf
https://www.crcv.ucf.edu/wp-content/uploads/2018/11/03815-supp.pdf
https://github.com/akshitac8/OW-DETR.},
year = {2022},
date = {2022-06-19},
urldate = {2022-06-19},
publisher = {IEEE Computer Vision and Pattern Recognition},
abstract = {Open-world object detection (OWOD) is a challenging computer vision problem, where the task is to detect a known set of object categories while simultaneously identifying unknown objects. Additionally, the model must incrementally learn new classes that become known in the next training episodes. Distinct from standard object detection, the OWOD setting poses significant challenges for generating quality candidate proposals on potentially unknown objects, separating the unknown objects from the background and detecting diverse unknown objects. Here, we introduce a novel end-to-end transformer-based framework, OW-DETR, for open-world object detection. The proposed OW-DETR comprises three dedicated components
namely, attention-driven pseudo-labeling, novelty classification and objectness scoring to explicitly address the aforementioned OWOD challenges. Our OW-DETR explicitly encodes multi-scale contextual information, possesses less inductive bias, enables knowledge transfer from known classes to the unknown class and can better discriminate between unknown objects and background. Comprehensive experiments are performed on two benchmarks: MS-COCO and PASCAL VOC. The extensive ablations reveal the merits of our proposed contributions. Further, our model outperforms the recently introduced OWOD approach, ORE, with absolute gains ranging from 1.8% to 3.3% in terms of unknown recall on MS-COCO. In the case of incremental
object detection, OW-DETR outperforms the state-of-the art for all settings on PASCAL VOC. Our code is available at https://github.com/akshitac8/OW-DETR.},
keywords = {CVPR, Object Detection, Open World, Transformers},
pubstate = {published},
tppubtype = {conference}
}
namely, attention-driven pseudo-labeling, novelty classification and objectness scoring to explicitly address the aforementioned OWOD challenges. Our OW-DETR explicitly encodes multi-scale contextual information, possesses less inductive bias, enables knowledge transfer from known classes to the unknown class and can better discriminate between unknown objects and background. Comprehensive experiments are performed on two benchmarks: MS-COCO and PASCAL VOC. The extensive ablations reveal the merits of our proposed contributions. Further, our model outperforms the recently introduced OWOD approach, ORE, with absolute gains ranging from 1.8% to 3.3% in terms of unknown recall on MS-COCO. In the case of incremental
object detection, OW-DETR outperforms the state-of-the art for all settings on PASCAL VOC. Our code is available at https://github.com/akshitac8/OW-DETR.
Dave, Ishan; Gupta, Rohit; Rizve, Mamshad Nayeem; Shah, Mubarak
TCLR: Temporal Contrastive Learning for Video Representation Journal Article
In: Computer Vision and Image Understanding, vol. 219, iss. 1077-3142, pp. 103406, 2022.
Abstract | Tags: Self-Supervised Learning | Links:
@article{nokey,
title = {TCLR: Temporal Contrastive Learning for Video Representation},
author = {Ishan Dave and Rohit Gupta and Mamshad Nayeem Rizve and Mubarak Shah },
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/1-s2.0-S1077314222000376-main.pdf
https://github.com/DAVEISHAN/TCLR},
doi = {https://doi.org/10.1016/j.cviu.2022.103406},
year = {2022},
date = {2022-05-01},
urldate = {2022-05-01},
journal = {Computer Vision and Image Understanding},
volume = {219},
issue = {1077-3142},
pages = {103406},
abstract = {Contrastive learning has nearly closed the gap between supervised and self-supervised learning of image representations, and has also been explored for videos. However, prior work on contrastive learning for video data has not explored the effect of explicitly encouraging the features to be distinct across the temporal dimension. We develop a new temporal contrastive learning framework consisting of two novel losses to improve upon existing contrastive self-supervised video representation learning methods. The local–local temporal contrastive loss adds the task of discriminating between non-overlapping clips from the same video, whereas the global–local temporal contrastive aims to discriminate between timesteps of the feature map of an input clip in order to increase the temporal diversity of the learned features. Our proposed temporal contrastive learning framework achieves significant improvement over the state-of-the-art results in various downstream video understanding tasks such as action recognition, limited-label action classification, and nearest-neighbor video retrieval on multiple video datasets and backbones. We also demonstrate significant improvement in fine-grained action classification for visually similar classes. With the commonly used 3D ResNet-18 architecture with UCF101 pretraining, we achieve 82.4% (+5.1% increase over the previous best) top-1 accuracy on UCF101 and 52.9% (+5.4% increase) on HMDB51 action classification, and 56.2% (+11.7% increase) Top-1 Recall on UCF101 nearest neighbor video retrieval. Code released at https://github.com/DAVEISHAN/TCLR.},
keywords = {Self-Supervised Learning},
pubstate = {published},
tppubtype = {article}
}
Kini, Jyoti; Khan, Fahad Shahbaz; Khan, Salman; Shah, Mubarak
Self-Supervised Video Object Segmentation via Cutout Prediction and Tagging Technical Report
no. arXiv:2204.10846, 2022.
Tags: Self-Supervised Learning, Video Object Segmentation | Links:
@techreport{Kini2022,
title = {Self-Supervised Video Object Segmentation via Cutout Prediction and Tagging},
author = {Jyoti Kini and Fahad Shahbaz Khan and Salman Khan and Mubarak Shah
},
url = {https://arxiv.org/pdf/2204.10846.pdf},
year = {2022},
date = {2022-04-24},
urldate = {2022-04-24},
number = {arXiv:2204.10846},
keywords = {Self-Supervised Learning, Video Object Segmentation},
pubstate = {published},
tppubtype = {techreport}
}
Modi, Rajat; Rana, Aayush Jung; Kumar, Akash; Tirupattur, Praveen; Vyas, Shruti; Rawat, Yogesh Singh; Shah, Mubarak
Video Action Detection: Analysing Limitations and Challenges Conference
IEEE Computer Vision and Pattern Recognition, 2022.
@conference{Modi2022,
title = {Video Action Detection: Analysing Limitations and Challenges},
author = {Rajat Modi and Aayush Jung Rana and Akash Kumar and Praveen Tirupattur and Shruti Vyas and Yogesh Singh Rawat and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/2204.07892.pdf},
year = {2022},
date = {2022-04-17},
urldate = {2022-04-17},
publisher = {IEEE Computer Vision and Pattern Recognition},
keywords = {},
pubstate = {published},
tppubtype = {conference}
}
Hassan, Shah; Jiban, MdJibanul Haque; Mahalanobis, Abhijit
Performance Evaluation of Boosted 2-stream TCRNet Conference
International Congress on Information and Communication Technology, 2022.
Tags: ICICT | Links:
@conference{nokey,
title = {Performance Evaluation of Boosted 2-stream TCRNet},
author = {Shah Hassan and MdJibanul Haque Jiban and Abhijit Mahalanobis},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/paper.pdf},
year = {2022},
date = {2022-03-02},
urldate = {2022-03-02},
publisher = {International Congress on Information and Communication Technology},
keywords = {ICICT},
pubstate = {published},
tppubtype = {conference}
}
Pestana, Camilo; Akhtar, Naveed; Rahnavard, Nazanin; Shah, Mubarak; Mian, Ajmal
Transferable 3D Adversarial Textures using End-to-end Optimization Conference
IEEE/CVF Winter Conference on Applications of Computer Vision (WACV), 2022.
Abstract | Tags: Adversarial Attacks, WACV | Links:
@conference{Pestana2022,
title = {Transferable 3D Adversarial Textures using End-to-end Optimization},
author = {Camilo Pestana and Naveed Akhtar and Nazanin Rahnavard and Mubarak Shah and Ajmal Mian},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/WACV_2022_Deceptive_Textures-1.pdf},
doi = {10.1109/WACV51458.2022.00080},
year = {2022},
date = {2022-02-15},
urldate = {2022-02-15},
pages = {727-736},
publisher = {IEEE/CVF Winter Conference on Applications of Computer Vision (WACV)},
abstract = {Deep visual models are known to be vulnerable to adversarial attacks. The last few years have seen numerous techniques to compute adversarial inputs for these models. However, there are still under-explored avenues in this critical research direction. Among those is the estimation of adversarial textures for 3D models in an end-to-end optimization scheme. In this paper, we propose such a scheme to generate adversarial textures for 3D models that are highly transferable and invariant to different camera views and lighting conditions. Our method makes use of neural rendering with explicit control over the model texture and background. We ensure transferability of the adversarial textures by employing an ensemble of robust and non-robust models. Our technique utilizes 3D models as a proxy to simulate closer to real-life conditions, in contrast to conventional use of 2D images for adversarial attacks. We show the efficacy of our method with extensive experiments.},
keywords = {Adversarial Attacks, WACV},
pubstate = {published},
tppubtype = {conference}
}
Aafaq, Nayyer; Mian, Ajmal; Naveed Akhtar, Wei Liu; Shah, Mubarak
Dense Video Captioning with Early Linguistic Information Fusion Journal Article
In: IEEE Transactions on Multimedia, pp. 1-1, 2022.
Abstract | Tags: Adversarial Attacks | Links:
@article{nokey,
title = {Dense Video Captioning with Early Linguistic Information Fusion},
author = {Nayyer Aafaq and Ajmal Mian and Naveed Akhtar, Wei Liu and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/IEEE_TMM_Nayyer_Jan_2022_Final_Version_Manuscript.pdf},
doi = {10.1109/TMM.2022.3146005},
year = {2022},
date = {2022-01-25},
urldate = {2022-01-25},
journal = {IEEE Transactions on Multimedia},
pages = {1-1},
abstract = {Dense captioning methods generally detect events in videos first and then generate captions for the individual events. Events are localized solely based on the visual cues while ignoring the associated linguistic information and context. Whereas end-to-end learning may implicitly take guidance from language, these methods still fall short of the power of explicit modeling. In this paper, we propose a Visual-Semantic Embedding (ViSE) Framework that models the word(s)-context distributional properties over the entire semantic space and computes weights for all the n-grams such that higher weights are assigned to the more informative n-grams. The weights are accounted for in learning distributed representations of all the captions to construct a semantic space. To perform the contextualization
of visual information and the constructed semantic space in a supervised manner, we design Visual-Semantic Joint Modeling Network (VSJM-Net). The learned ViSE embeddings are then temporally encoded with a Hierarchical Descriptor Transformer (HDT). For caption generation, we exploit a transformer architecture to decode the input embeddings into natural language descriptions. Experiments on the large-scale ActivityNet Captions dataset and YouCook-II dataset demonstrate the efficacy of our method.
Index Terms—Dense video captioning, event localisation, language and vision, video captioning, context modeling.},
keywords = {Adversarial Attacks},
pubstate = {published},
tppubtype = {article}
}
of visual information and the constructed semantic space in a supervised manner, we design Visual-Semantic Joint Modeling Network (VSJM-Net). The learned ViSE embeddings are then temporally encoded with a Hierarchical Descriptor Transformer (HDT). For caption generation, we exploit a transformer architecture to decode the input embeddings into natural language descriptions. Experiments on the large-scale ActivityNet Captions dataset and YouCook-II dataset demonstrate the efficacy of our method.
Index Terms—Dense video captioning, event localisation, language and vision, video captioning, context modeling.
Kardan, Navid; Hill, Mitchell; Shah, Mubarak
Self-Joint Supervised Learning Conference
International Conference on Learning Representations (ICLR), 2022.
Abstract | Tags: ICLR | Links:
@conference{Kardan2022,
title = {Self-Joint Supervised Learning},
author = {Navid Kardan and Mitchell Hill and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/Self_joint_ICLR-002.pdf
https://github.com/ndkn/Self-joint-Learning},
year = {2022},
date = {2022-01-20},
urldate = {2022-01-20},
publisher = {International Conference on Learning Representations (ICLR)},
abstract = {Supervised learning is a fundamental framework used to train machine learning systems. A supervised learning problem is often formulated using an i.i.d. as-sumption that restricts model attention to a single relevant signal at a time when predicting. This contrasts with the human ability to actively use related samples as reference when making decisions. We hypothesize that the restriction to a single signal for each prediction in the standard i.i.d. framework contributes to well-known drawbacks of supervised learning: making overconfident predictions and vulnerability to overfitting, adversarial attacks, and out-of-distribution data. To address these limitations, we propose a new supervised learning paradigm called self-joint learning that generalizes the standard approach by modeling the joint conditional distribution of two observed samples, where each sample is an im-age and its label. Rather than assuming samples are independent, our models explicitly learn the sample-to-sample relation of conditional independence. Our framework can naturally incorporate auxiliary unlabeled data to further improve the performance. Experiments on benchmark image datasets show our method offers significant improvement over standard supervised learning in terms of ac-curacy, robustness against adversarial attacks, out-of-distribution detection, and overconfidence mitigation. Code: github.com/ndkn/Self-joint-Learning},
keywords = {ICLR},
pubstate = {published},
tppubtype = {conference}
}
Fioresi, Joseph; Colvin, Dylan J.; Frota, Rafaela; Gupta, Rohit; Li, Mengjie; Seigneur, Hubert P.; Vyas, Shruti; Oliveira, Sofia; Shah, Mubarak; Davis, Kristopher O.
Automated Defect Detection and Localization in Photovoltaic Cells Using Semantic Segmentation of Electroluminescence Images Journal Article
In: IEEE Journal of Photovoltaics, vol. 12, no. 1, pp. 53-61, 2022.
Abstract | Tags: REU, Semantic Segmentation, Solar Cells | Links:
@article{Fioresi2022,
title = {Automated Defect Detection and Localization in Photovoltaic Cells Using Semantic Segmentation of Electroluminescence Images},
author = {Joseph Fioresi and Dylan J. Colvin and Rafaela Frota and Rohit Gupta and Mengjie Li and Hubert P. Seigneur and Shruti Vyas and Sofia Oliveira and Mubarak Shah and Kristopher O. Davis},
url = {https://ieeexplore.ieee.org/document/9650542},
doi = {10.1109/JPHOTOV.2021.3131059},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
journal = {IEEE Journal of Photovoltaics},
volume = {12},
number = {1},
pages = {53-61},
abstract = {In this article, we propose a deep learning based semantic segmentation model that identifies and segments defects in electroluminescence (EL) images of silicon photovoltaic (PV) cells. The proposed model can differentiate between cracks, contact interruptions, cell interconnect failures, and contact corrosion for both multicrystalline and monocrystalline silicon cells. Our model utilizes a segmentation Deeplabv3 model with a ResNet-50 backbone. It was trained on 17,064 EL images including 256 physically realistic simulated images of PV cells generated to deal with class imbalance. While performing semantic segmentation for five defect classes, this model achieves a weighted F1-score of 0.95, an unweighted F1-score of 0.69, a pixel-level global accuracy of 95.4%, and a mean intersection over union score of 57.3%. In addition, we introduce the UCF EL Defect dataset, a large-scale dataset consisting of 17,064 EL images, which will be publicly available for use by the PV and computer vision research communities.},
keywords = {REU, Semantic Segmentation, Solar Cells},
pubstate = {published},
tppubtype = {article}
}
2021
Kerrigan, Alec; Duarte, Kevin; Rawat, Yogesh Singh; Shah, Mubarak
Reformulating Zero-shot Action Recognition for Multi-label Actions Conference
Thirty-fifth Conference on Neural Information Processing Systems, 2021.
Tags: Action Recognition, NeurIPS, Zero-Shot Learning | Links:
@conference{Kerrigan2021,
title = {Reformulating Zero-shot Action Recognition for Multi-label Actions},
author = {Alec Kerrigan and Kevin Duarte and Yogesh Singh Rawat and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/reformulating_zero_shot_action2.pdf
https://www.crcv.ucf.edu/wp-content/uploads/2018/11/ZSL-Supp.pdf
https://www.crcv.ucf.edu/wp-content/uploads/2018/11/Reformulating-Zero-shot-Action-Recognition-forMulti-label-Actions.pptx},
year = {2021},
date = {2021-12-06},
urldate = {2021-12-06},
booktitle = {Thirty-fifth Conference on Neural Information Processing Systems},
keywords = {Action Recognition, NeurIPS, Zero-Shot Learning},
pubstate = {published},
tppubtype = {conference}
}
Lei, Huan; Akhtar, Naveed; Shah, Mubarak; Mian, Ajmal
Geometric Feature Learning for 3D Meshes Journal Article
In: arXiv, 2021.
Abstract | Tags: | Links:
@article{nokey,
title = {Geometric Feature Learning for 3D Meshes},
author = {Huan Lei and Naveed Akhtar and Mubarak Shah and Ajmal Mian},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/2112.01801.pdf
https://github.com/EnyaHermite/Picasso},
year = {2021},
date = {2021-12-03},
journal = {arXiv},
abstract = {—Geometric feature learning for 3D meshes is central to computer graphics and highly important for numerous vision applications. However, deep learning currently lags in hierarchical modeling of heterogeneous 3D meshes due to the lack of required operations and/or their efficient implementations. In this paper, we propose a series of modular operations for effective geometric deep learning over heterogeneous 3D meshes. These operations include mesh convolutions, (un)pooling and efficient mesh decimation. We provide open source implementation of these operations, collectively termed Picasso. The mesh decimation module of Picasso is GPU-accelerated, which can process a batch of meshes on-the-fly for deep learning. Our (un)pooling operations compute features for newly-created neurons across network layers of varying resolution. Our mesh convolutions include facet2vertex, vertex2facet, and facet2facet convolutions that exploit vMF mixture and Barycentric interpolation to incorporate fuzzy modelling. Leveraging the modular operations of Picasso, we contribute a novel hierarchical neural network, PicassoNet-II, to learn highly discriminative features from 3D meshes. PicassoNet-II accepts primitive geometrics and fine textures of mesh facets as input features, while processing full scene meshes. Our network achieves highly competitive performance for shape analysis and scene parsing on a variety of benchmarks. We release Picasso and PicassoNet-II on Github.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Duarte, Kevin
Capsule Networks for Video Understanding PhD Thesis
University of Central Florida, 2021.
Tags: Action Detection, Capsule Networks, Multi-Modal Learning, Ph.D. Dissertation, Text and Video | Links:
@phdthesis{Duarte2021b,
title = {Capsule Networks for Video Understanding},
author = {Kevin Duarte},
url = {https://www.crcv.ucf.edu/people/alumni/#:~:text=Capsule%20Networks%20for%20Video%20Understanding},
year = {2021},
date = {2021-12-01},
urldate = {2021-12-01},
school = {University of Central Florida},
keywords = {Action Detection, Capsule Networks, Multi-Modal Learning, Ph.D. Dissertation, Text and Video},
pubstate = {published},
tppubtype = {phdthesis}
}
Duarte, Kevin; Chen, Brian; Shvetsova, Nina; Rouditchenko, Andrew; Thomas, Samuel; Liu, Alexander; Harwath, David; Glass, James; Kuehne, Hilde; Shah, Mubarak
Routing with Self-Attention for Multimodal Capsule Networks Unpublished
arXiv preprint arXiv:2112.00775, 2021.
Abstract | Tags: Audio, Capsule Networks, Multi-Modal Learning, Text and Video | Links:
@unpublished{nokey,
title = {Routing with Self-Attention for Multimodal Capsule Networks},
author = {Kevin Duarte and Brian Chen and Nina Shvetsova and Andrew Rouditchenko and Samuel Thomas and Alexander Liu and David Harwath and James Glass and Hilde Kuehne and Mubarak Shah},
editor = {arXiv},
url = {https://arxiv.org/pdf/2112.00775.pdf
https://arxiv.org/abs/2112.00775},
doi = { https://doi.org/10.48550/arXiv.2112.00775},
year = {2021},
date = {2021-12-01},
urldate = {2021-12-01},
abstract = {The task of multimodal learning has seen a growing interest recently as it allows for training neural architectures based on different modalities such as vision, text, and audio. One challenge in training such models is that they need to jointly learn semantic concepts and their relationships across different input representations. Capsule networks have been shown to perform well in context of capturing the relation between low-level input features and higher-level concepts. However, capsules have so far mainly been used only in small-scale fully supervised settings due to the resource demand of conventional routing algorithms. We present a new multimodal capsule network that allows us to leverage the strength of capsules in the context of a multimodal learning framework on large amounts of video data. To adapt the capsules to large-scale input data, we propose a novel routing by self-attention mechanism that selects relevant capsules which are then used to generate a final joint multimodal feature representation. This allows not only for robust training with noisy video data, but also to scale up the size of the capsule network compared to traditional routing methods while still being computationally efficient. We evaluate the proposed architecture by pretraining it on a large-scale multimodal video dataset and applying it on four datasets in two challenging downstream tasks. Results show that the proposed multimodal capsule network is not only able to improve results compared to other routing techniques, but also achieves competitive performance on the task of multimodal learning.},
howpublished = {arXiv preprint arXiv:2112.00775},
keywords = {Audio, Capsule Networks, Multi-Modal Learning, Text and Video},
pubstate = {published},
tppubtype = {unpublished}
}
Rajasegaran, Jathushan; Khan, Salman; Hayat, Munawar; Khan, Fahad Shahbaz; Shah, Mubarak
Meta-learning the Learning Trends Shared Across Tasks Conference
British Machine Vision Conference, Nov 22-25, 2021.
Tags: BMVC, Few-Shot Learning, Meta-Learning | Links:
@conference{Rajasegaran2021,
title = {Meta-learning the Learning Trends Shared Across Tasks},
author = {Jathushan Rajasegaran and Salman Khan and Munawar Hayat and Fahad Shahbaz Khan and Mubarak Shah},
url = {https://www.bmvc2021-virtualconference.com/conference/papers/paper_0874.html},
year = {2021},
date = {2021-11-22},
urldate = {2021-11-22},
booktitle = {British Machine Vision Conference, Nov 22-25},
keywords = {BMVC, Few-Shot Learning, Meta-Learning},
pubstate = {published},
tppubtype = {conference}
}
Rajasegaran, Jathushan; Khan, Salman; Hayat, Munawar; Khan, Fahad Shahbaz; Shah, Mubarak
Self-supervised Knowledge Distillation for Few-shot Learning Conference
British Machine Vision Conference, Nov 22-25, 2021.
Tags: BMVC, Few-Shot Learning, Knowledge Distillation, Self-Supervision | Links:
@conference{Rajasegaran2020,
title = {Self-supervised Knowledge Distillation for Few-shot Learning},
author = {Jathushan Rajasegaran and Salman Khan and Munawar Hayat and Fahad Shahbaz Khan and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2020/07/Publications_Self-supervised-Knowledge-Distillation-for-Few-shot-Learning.pdf
https://bmvc2021-virtualconference.com/conference/papers/paper_0820.html
https://github.com/brjathu/SKD},
year = {2021},
date = {2021-11-22},
urldate = {2021-11-22},
booktitle = {British Machine Vision Conference, Nov 22-25},
keywords = {BMVC, Few-Shot Learning, Knowledge Distillation, Self-Supervision},
pubstate = {published},
tppubtype = {conference}
}
Akhtar, Naveed; Mian, Ajmal; Kardan, Navid; Shah, Mubarak
Advances in Adversarial Attacks and Defenses in Computer Vision: A Survey Journal Article
In: IEEE Access, vol. 9, pp. 155161-155196, 2021.
Abstract | Tags: Adversarial Attacks | Links:
@article{Akhtar2021,
title = {Advances in Adversarial Attacks and Defenses in Computer Vision: A Survey},
author = {Naveed Akhtar and Ajmal Mian and Navid Kardan and Mubarak Shah},
editor = {IEEE Access},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/Advances_in_Adversarial_Attacks_and_Defenses_in_Computer_Vision_A_Survey.pdf},
doi = {10.1109/ACCESS.2021.3127960},
year = {2021},
date = {2021-11-12},
urldate = {2021-11-12},
journal = {IEEE Access},
volume = {9},
pages = {155161-155196},
abstract = {Deep Learning is the most widely used tool in the contemporary field of computer vision. Its ability to accurately solve complex problems is employed in vision research to learn deep neural models for a variety of tasks, including security critical applications. However, it is now known that deep learning is vulnerable to adversarial attacks that can manipulate its predictions by introducing visually imperceptible perturbations in images and videos. Since the discovery of this phenomenon in 2013, it has attracted significant attention of researchers from multiple sub-fields of machine intelligence. In 2018, we published the first-ever review of the contributions made by the computer vision community in adversarial attacks on deep learning (and their defenses). Many of those contributions have inspired new directions in this area, which has matured significantly since witnessing the first generation methods. Hence, as a legacy sequel of our first literature survey, this review article focuses on the advances in this area since 2018. We thoroughly discuss the first generation attacks and comprehensively cover the modern attacks and their defenses appearing in the prestigious sources of computer vision and machine learning research. Besides offering the most comprehensive literature review of adversarial attacks and defenses to date, the article also provides concise definitions of technical terminologies for the non-experts. Finally, it discusses challenges and future outlook of this direction based on the literature since the advent of this research direction.},
keywords = {Adversarial Attacks},
pubstate = {published},
tppubtype = {article}
}
Xia, Haifeng; Jing, Taotao; Chen, Chen; Ding, Zhengming
Semi-supervised Domain Adaptive Retrieval via Discriminative Hashing Learning Conference
ACM Multimedia (ACM MM), 2021 (Oral), 2021.
Tags: ACM MM | Links:
@conference{nokey,
title = {Semi-supervised Domain Adaptive Retrieval via Discriminative Hashing Learning},
author = {Haifeng Xia and Taotao Jing and Chen Chen and Zhengming Ding},
url = {https://www.crcv.ucf.edu/chenchen/DHLing_MM_2021.pdf},
doi = {10.1145/3474085.3475526},
year = {2021},
date = {2021-10-20},
booktitle = {ACM Multimedia (ACM MM), 2021 (Oral)},
keywords = {ACM MM},
pubstate = {published},
tppubtype = {conference}
}
Zaeemzadeh, Alireza; Ghadar, Shabnam; Faieta, Baldo; Lin, Zhe; Rahnavard, Nazanin; Shah, Mubarak; Kalarot, Ratheesh
Face Image Retrieval with Attribute Manipulation Conference
International Conference on Computer Vision, 2021.
Abstract | Tags: ICCV, Style GAN, Visual Search | Links:
@conference{ZaeemzadehICCV2021,
title = {Face Image Retrieval with Attribute Manipulation},
author = {Alireza Zaeemzadeh and Shabnam Ghadar and Baldo Faieta and Zhe Lin and Nazanin Rahnavard and Mubarak Shah and Ratheesh Kalarot},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/06328.pdf
https://www.crcv.ucf.edu/wp-content/uploads/2018/11/06328-supp.pdf},
year = {2021},
date = {2021-10-11},
urldate = {2021-10-11},
booktitle = {International Conference on Computer Vision},
abstract = {Current face image retrieval solutions are limited, since they treat different facial attributes the same and cannot incorporate
user’s preference for a subset of attributes in their search criteria. This paper introduces a new face image retrieval framework, where the input face query is augmented by both an adjustment vector that specifies the desired modifications
to the facial attributes, and a preference vector that assigns different levels of importance to different attributes. For example, a user can ask for retrieving images similar to a query image, but with a different hair color, and no preference for absence/presence of eyeglasses in the results. To achieve this, we propose to disentangle the semantics, corresponding to various attributes, by learning a set of sparse and orthogonal basis vectors in the latent space of StyleGAN. Such basis vectors are then employed to decompose the dissimilarity between face images in terms of dissimilarity between their attributes, assign preference to the attributes, and adjust the attributes in the query. Enforcing sparsity on the basis vectors helps us to disentangle the latent space and adjust each attribute independently from other attributes, while enforcing orthogonality facilitates preference assignment and the dissimilarity decomposition. The effectiveness of our approach is illustrated by achieving state-of-the-art results for the face image retrieval task. },
keywords = {ICCV, Style GAN, Visual Search},
pubstate = {published},
tppubtype = {conference}
}
user’s preference for a subset of attributes in their search criteria. This paper introduces a new face image retrieval framework, where the input face query is augmented by both an adjustment vector that specifies the desired modifications
to the facial attributes, and a preference vector that assigns different levels of importance to different attributes. For example, a user can ask for retrieving images similar to a query image, but with a different hair color, and no preference for absence/presence of eyeglasses in the results. To achieve this, we propose to disentangle the semantics, corresponding to various attributes, by learning a set of sparse and orthogonal basis vectors in the latent space of StyleGAN. Such basis vectors are then employed to decompose the dissimilarity between face images in terms of dissimilarity between their attributes, assign preference to the attributes, and adjust the attributes in the query. Enforcing sparsity on the basis vectors helps us to disentangle the latent space and adjust each attribute independently from other attributes, while enforcing orthogonality facilitates preference assignment and the dissimilarity decomposition. The effectiveness of our approach is illustrated by achieving state-of-the-art results for the face image retrieval task.
Regmi, Krishna; Shah, Mubarak
Video Geo-Localization Employing Geo-Temporal Feature Learning and GPS Trajectory Smoothing Conference
International Conference on Computer Vision, 2021.
Abstract | Tags: Geo-Localization, ICCV, Transformers, Video Geo-localization | Links:
@conference{RegmiICCV2021,
title = {Video Geo-Localization Employing Geo-Temporal Feature Learning and GPS Trajectory Smoothing},
author = {Krishna Regmi and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/video_geolocalization_ICCV2021.pdf
https://github.com/kregmi/VTE},
year = {2021},
date = {2021-10-11},
urldate = {2021-10-11},
booktitle = {International Conference on Computer Vision},
abstract = {In this paper, we address the problem of video geolocalization by proposing a Geo-Temporal Feature Learning (GTFL) Network to simultaneously learn the discriminative features for the query video frames and the gallery images for estimating the geo-spatial trajectory of a query video. Based on a transformer encoder architecture, our
GTFL model encodes query and gallery data separately, via two dedicated branches. The proposed GPS Loss and Clip Triplet Loss exploit the geographical and temporal proximity between the frames and the clips to jointly learn the query and the gallery features. We also propose a deep learning approach to trajectory smoothing by predicting the outliers in the estimated GPS positions and learning the offsets to smooth the trajectory. We build a large dataset from four different regions of USA; New York, San Francisco, Berkeley and Bay Area using BDD driving videos as query, and by collecting corresponding Google StreetView (GSV) Images for gallery. Extensive
evaluations of proposed method on this new dataset are provided. Code and dataset details is publicly available at
https://github.com/kregmi/VTE.},
keywords = {Geo-Localization, ICCV, Transformers, Video Geo-localization},
pubstate = {published},
tppubtype = {conference}
}
GTFL model encodes query and gallery data separately, via two dedicated branches. The proposed GPS Loss and Clip Triplet Loss exploit the geographical and temporal proximity between the frames and the clips to jointly learn the query and the gallery features. We also propose a deep learning approach to trajectory smoothing by predicting the outliers in the estimated GPS positions and learning the offsets to smooth the trajectory. We build a large dataset from four different regions of USA; New York, San Francisco, Berkeley and Bay Area using BDD driving videos as query, and by collecting corresponding Google StreetView (GSV) Images for gallery. Extensive
evaluations of proposed method on this new dataset are provided. Code and dataset details is publicly available at
https://github.com/kregmi/VTE.
Zheng, Ce; Zhu, Sijie; Mendieta, Matias; Yang, Taojiannan; Chen, Chen; Ding, Zhengming
3D Human Pose Estimation with Spatial and Temporal Transformers Conference
International Conference on Computer Vision, 2021.
Tags: Action Recognition, Human Pose Estimation, ICCV, Pose | Links:
@conference{nokey,
title = {3D Human Pose Estimation with Spatial and Temporal Transformers},
author = {Ce Zheng and Sijie Zhu and Matias Mendieta and Taojiannan Yang and Chen Chen and Zhengming Ding},
url = {https://arxiv.org/pdf/2103.10455.pdf},
year = {2021},
date = {2021-10-11},
urldate = {2021-10-11},
booktitle = {International Conference on Computer Vision},
keywords = {Action Recognition, Human Pose Estimation, ICCV, Pose},
pubstate = {published},
tppubtype = {conference}
}
Bhunia, Ankan Kumar; Khan, Salman; Cholakkal, Hisham; Anwer, Rao Muhammad; Khan, Fahad Shahbaz; Shah, Mubarak
Handwriting Transformers Conference
International Conference on Computer Vision, 2021.
Abstract | Tags: ICCV, Transformers | Links:
@conference{BhuniaICCV2021,
title = {Handwriting Transformers},
author = {Ankan Kumar Bhunia and Salman Khan and Hisham Cholakkal and Rao Muhammad Anwer and Fahad Shahbaz Khan and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/Handwriting_Generation_ICCV21.pdf
https://www.crcv.ucf.edu/wp-content/uploads/2018/11/Handwriting_Generation_ICCV21_supp.pdf},
year = {2021},
date = {2021-10-11},
urldate = {2021-10-11},
booktitle = {International Conference on Computer Vision},
abstract = {We propose a novel transformer-based styled handwritten text image generation approach, HWT, that strives to learn both style-content entanglement as well as global and local writing style patterns. The proposed HWT captures the long and short range relationships within the style examples through a self-attention mechanism, thereby encoding both global and local style patterns. Further, the proposed transformer-based HWT comprises an encoder-decoder attention that enables style-content entanglement by gathering the style representation of each query character. To the best of our knowledge, we are the first to introduce a transformer-based generative network for styled handwritten text generation.
Our proposed HWT generates realistic styled handwritten text images and significantly outperforms the state-of-the-art demonstrated through extensive qualitative, quantitative and human-based evaluations. The proposed HWT can handle arbitrary length of text and any desired writing style in a few-shot setting. Further, our HWT generalizes well to the challenging scenario where both words and writing style are unseen during training, generating realistic styled handwritten text images.},
keywords = {ICCV, Transformers},
pubstate = {published},
tppubtype = {conference}
}
Our proposed HWT generates realistic styled handwritten text images and significantly outperforms the state-of-the-art demonstrated through extensive qualitative, quantitative and human-based evaluations. The proposed HWT can handle arbitrary length of text and any desired writing style in a few-shot setting. Further, our HWT generalizes well to the challenging scenario where both words and writing style are unseen during training, generating realistic styled handwritten text images.
Narayan, Sanath; Gupta, Akshita; Khan, Salman; Khan, Fahad Shahbaz; Shao, Ling; Shah, Mubarak
Discriminative Region-based Multi-Label Zero-Shot Learning Conference
International Conference on Computer Vision, 2021.
Abstract | Tags: ICCV, Transformers, Zero-Shot Learning | Links:
@conference{NarayanICCV2021,
title = {Discriminative Region-based Multi-Label Zero-Shot Learning},
author = {Sanath Narayan and Akshita Gupta and Salman Khan and Fahad Shahbaz Khan and Ling Shao and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/02617.pdf
https://www.crcv.ucf.edu/wp-content/uploads/2018/11/02617-supp.pdf},
year = {2021},
date = {2021-10-11},
urldate = {2021-10-11},
booktitle = {International Conference on Computer Vision},
abstract = {Multi-label zero-shot learning (ZSL) is a more realistic counter-part of standard single-label ZSL since several objects can co-exist in a natural image. However, the occurrence of multiple objects complicates the reasoning and re-quires region-specific processing of visual features to pre-serve their contextual cues. We note that the best existing multi-label ZSL method takes a shared approach towards attending to region features with a common set of attention maps for all the classes. Such shared maps lead to diffused attention, which does not discriminatively focus on relevant locations when the number of classes are large. Moreover, mapping spatially-pooled visual features to the class semantics leads to inter-class feature entanglement, thus hampering the classification. Here, we propose an alternate approach towards region-based discriminability-preserving multi-label zero-shot classification. Our approach maintains the spatial resolution to preserve region-level characteristics and utilizes a bi-level attention module (BiAM) to enrich the features by incorporating both region and scene context information. The enriched region-level features are then mapped to the class semantics and only their class predictions are spatially pooled to obtain image-level predictions, thereby keeping the multi-class features
disentangled. Our approach sets a new state of the art on two large-scale multi-label zero-shot benchmarks: NUS-WIDE and Open Images. On NUS-WIDE, our approach achieves an absolute gain of 6.9% mAP for ZSL, compared to the best published results. Source code is available at https://github.com/akshitac8/BiAM.},
keywords = {ICCV, Transformers, Zero-Shot Learning},
pubstate = {published},
tppubtype = {conference}
}
disentangled. Our approach sets a new state of the art on two large-scale multi-label zero-shot benchmarks: NUS-WIDE and Open Images. On NUS-WIDE, our approach achieves an absolute gain of 6.9% mAP for ZSL, compared to the best published results. Source code is available at <a href="https://github.com/akshitac8/BiAM" target="_blank">https://github.com/akshitac8/BiAM</a>.
Chen, Brian; Rouditchenko, Andrew; Duarte, Kevin; Kuehne, Hilde; Thomas, Samuel; Boggust, Angie; Panda, Rameswar; Kingsbury, Brian; Feris, Rogerio; Harwatch, David; Glass, James; Picheny, Michael; Chang, Shih-Fu
Multimodal Clustering Networks for Self-supervised Learning from Unlabeled Videos Conference
International Conference on Computer Vision, 2021.
Abstract | Tags: ICCV, Multi-Modal Learning | Links:
@conference{ChenICCV2021,
title = {Multimodal Clustering Networks for Self-supervised Learning from Unlabeled Videos},
author = {Brian Chen and Andrew Rouditchenko and Kevin Duarte and Hilde Kuehne and Samuel Thomas and Angie Boggust and Rameswar Panda and Brian Kingsbury and Rogerio Feris and David Harwatch and James Glass and Michael Picheny and Shih-Fu Chang},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/02965.pdf
https://www.crcv.ucf.edu/wp-content/uploads/2018/11/02965-supp.pdf},
year = {2021},
date = {2021-10-11},
urldate = {2021-10-11},
booktitle = {International Conference on Computer Vision},
abstract = {Multimodal self-supervised learning is getting more and more attention as it allows not only to train large networks without human supervision but also to search and retrieve data across various modalities. In this context, this paper proposes a framework that, starting from a pre- trained backbone, learns a common multimodal embedding space that, in addition to sharing representations across different modalities, enforces a grouping of semantically similar instances.
To this end, we extend the concept of instance-level contrastive learning with a multimodal lustering step in the training pipeline to capture semantic similarities across modalities. The resulting embedding space enables retrieval of samples across all modalities, even from unseen datasets and different domains. To evaluate our approach, we train our model on the HowTo100M dataset and evaluate its zero-shot retrieval capabilities in two challenging domains,
namely text-to-video retrieval, and temporal action localization, showing state-of-the-art results on four different datasets. },
keywords = {ICCV, Multi-Modal Learning},
pubstate = {published},
tppubtype = {conference}
}
To this end, we extend the concept of instance-level contrastive learning with a multimodal lustering step in the training pipeline to capture semantic similarities across modalities. The resulting embedding space enables retrieval of samples across all modalities, even from unseen datasets and different domains. To evaluate our approach, we train our model on the HowTo100M dataset and evaluate its zero-shot retrieval capabilities in two challenging domains,
namely text-to-video retrieval, and temporal action localization, showing state-of-the-art results on four different datasets.
Swetha, Sirnam; Kuehne, Hilde; Rawat, Yogesh Singh; Shah, Mubarak
Unsupervised Discriminative Embedding for Sub-Action Learning in Complex Activities Conference
IEEE International Conference on Image Processing, 2021.
Tags: Action Recognition, ICIP, Un-supervised Learning | Links:
@conference{Swetha2021,
title = {Unsupervised Discriminative Embedding for Sub-Action Learning in Complex Activities},
author = {Sirnam Swetha and Hilde Kuehne and Yogesh Singh Rawat and Mubarak Shah },
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/Unsupervised-Discriminative-Embedding-for-Sub-Action-Learning-in-Complex-Activities.pdf},
year = {2021},
date = {2021-09-19},
urldate = {2021-09-19},
booktitle = {IEEE International Conference on Image Processing},
keywords = {Action Recognition, ICIP, Un-supervised Learning},
pubstate = {published},
tppubtype = {conference}
}
Jiban, Md Jibanul Haque; Hassan, Shah; Mahalanobis, Abhijit
Two-Stream Boosted TCRNET for Range-Tolerant Infra-Red Target Detection Conference
IEEE Conference on Image Processing, 2021.
Abstract | Tags: ICIP | Links:
@conference{jiban2021icip,
title = {Two-Stream Boosted TCRNET for Range-Tolerant Infra-Red Target Detection},
author = {Md Jibanul Haque Jiban and Shah Hassan and Abhijit Mahalanobis},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/Two-Stream_TCRNet__IEEE_ICIP2021.pdf},
year = {2021},
date = {2021-09-19},
publisher = {IEEE Conference on Image Processing},
abstract = {The detection of vehicular targets in infra-red imagery is a challenging task, both due to the relatively few pixels on target and the false alarms produced by the surrounding terrain clutter. It has been previously shown [1] that a relatively simple network (known as TCRNet) can outperform conventional deep CNNs for such applications by maximizing a target to clutter ratio (TCR) metric. In this paper, we introduce a new form of the network (referred to as TCRNet-2) that further improves the performance by first processing target and clutter information in two parallel channels and then combining them to optimize the TCR metric. We also show that the overall performance can be considerably improved by boosting the performance of a primary TCRNet-2 detector, with a secondary
network that enhances discrimination between targets and clutter in the false alarm space of the primary network. We analyze the performance of the proposed networks using a publicly available data set of infra-red images of targets in natural terrain. It is shown that the TCRNet-2 and its boosted version yield considerably better performance than the original TCRNet over a wide range of distances, in both day and night conditions.
Index Terms— TCRNet, Infrared, Target Detection, MWIR, Surveillance},
keywords = {ICIP},
pubstate = {published},
tppubtype = {conference}
}
network that enhances discrimination between targets and clutter in the false alarm space of the primary network. We analyze the performance of the proposed networks using a publicly available data set of infra-red images of targets in natural terrain. It is shown that the TCRNet-2 and its boosted version yield considerably better performance than the original TCRNet over a wide range of distances, in both day and night conditions.
Index Terms— TCRNet, Infrared, Target Detection, MWIR, Surveillance
Arif, Maliha; Mahalanobis, Abhijit
Few Shot Learning for Infra-Red Object Recognition Using Analytically Designed Low Level Filters for Data Representation Conference
IEEE International Conference on Image Processing, 2021.
Abstract | Tags: ICIP | Links:
@conference{ArifICIP2021,
title = {Few Shot Learning for Infra-Red Object Recognition Using Analytically Designed Low Level Filters for Data Representation},
author = {Maliha Arif and Abhijit Mahalanobis},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/ICIP_2021__Sparse_Learning-Camera-Ready.pdf},
year = {2021},
date = {2021-09-19},
publisher = {IEEE International Conference on Image Processing},
abstract = {It is well known that deep convolutional neural networks (CNNs) generalize well over large number of classes when ample training data is available. However, training with smaller datasets does not always achieve robust performance. In such cases, we show that using analytically derived filters in the lowest layer enables a network to achieve better performance than learning from scratch using a relatively small dataset. These class-agnostic filters represent the underlying manifold of the data space, and also generalize to new or unknown classes which may occur on the same manifold. This directly enables new classes to be learned with very few images by simply fine-tuning the final few layers of the network. We illustrate the advantages of our method using the publicly available set of infra-red images of vehicular ground targets. We compare a simple CNN trained using our method with transfer learning performed using the VGG-16 network, and show that when the number of training images is limited, the proposed approach not only achieves better results on the trained classes, but also outperforms a standard network for learning a new object class.
Index Terms— manifold, eigen representation, few shot learning, sparse learning, infra-red datasets},
keywords = {ICIP},
pubstate = {published},
tppubtype = {conference}
}
Index Terms— manifold, eigen representation, few shot learning, sparse learning, infra-red datasets
Cuellar, Adam; Mahalanobis, Abhijit
Detection of Small Moving Ground Vehicles in Cluttered Terrain Using Infrared Video Imagery Conference
IEEE International Conference on Image Processing, 2021.
Abstract | Tags: ICIP | Links:
@conference{CuellarICIP2021,
title = {Detection of Small Moving Ground Vehicles in Cluttered Terrain Using Infrared Video Imagery},
author = {Adam Cuellar and Abhijit Mahalanobis},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/atc_icip.pdf},
year = {2021},
date = {2021-09-19},
publisher = {IEEE International Conference on Image Processing},
abstract = {The detection of small moving targets in cluttered infrared imagery remains a difficult and challenging task. Conventional image subtraction techniques with frame-to-frame registration yield very high false alarm rates. Furthermore, state of the art deep convolutional neural networks (DCNNs) such as YOLO and Mask R-CNN also do not work well for this application. We show however, that it is possible to train a CNN to detect moving targets in a stack of stabilized images
by maximizing a target to clutter ratio (TCR) metric. This metric has been previously used for detecting relatively large stationary targets in single images, but not for the purposes of finding small moving targets using multiple frames. Referred to as moving target indicator network (MTINet), the proposed network does not rely on image subtraction, but instead uses depth-wise convolution to learn inter-frame temporal dependencies. We compare the performance of the MTINet to state of the art DCNNs and a statistical anomaly detection algorithm, and propose a combined approach that offers the benefits of both data-driven learning and statistical analysis.
Index Terms— Detection, Localization, Infrared, CNN },
keywords = {ICIP},
pubstate = {published},
tppubtype = {conference}
}
by maximizing a target to clutter ratio (TCR) metric. This metric has been previously used for detecting relatively large stationary targets in single images, but not for the purposes of finding small moving targets using multiple frames. Referred to as moving target indicator network (MTINet), the proposed network does not rely on image subtraction, but instead uses depth-wise convolution to learn inter-frame temporal dependencies. We compare the performance of the MTINet to state of the art DCNNs and a statistical anomaly detection algorithm, and propose a combined approach that offers the benefits of both data-driven learning and statistical analysis.
Index Terms— Detection, Localization, Infrared, CNN
Shiraz, Sarah; Regmi, Krishna; Vyas, Shruti; Rawat, Yogesh Singh; Shah, Mubarak
Novel View Video Prediction using Dual Representation Conference
IEEE International Conference on Image Processing, 2021.
Tags: Cross-View, ICIP, View Synthesis | Links:
@conference{Shiraz2021,
title = {Novel View Video Prediction using Dual Representation},
author = {Sarah Shiraz and Krishna Regmi and Shruti Vyas and Yogesh Singh Rawat and Mubarak Shah
},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/Projects_Novel-View-Video-Prediction-using-dual-Representation.pdf
https://www.crcv.ucf.edu/research/projects/novel-view-video-prediction-using-dual-representation/},
year = {2021},
date = {2021-09-19},
urldate = {2021-09-19},
booktitle = {IEEE International Conference on Image Processing},
keywords = {Cross-View, ICIP, View Synthesis},
pubstate = {published},
tppubtype = {conference}
}
Ott, Aaron; Mazaheri, Amir; da Vitoria Lobo, Niels; Shah, Mubarak
Deep Photo Cropper and Enhancer Journal Article
In: CoRR, vol. abs/2008.00634, 2021.
Tags: Dataset, Image Enhancement, REU | Links:
@article{Ott2021,
title = {Deep Photo Cropper and Enhancer},
author = {Aaron Ott and Amir Mazaheri and Niels da Vitoria Lobo and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2020/08/Publications_DEEP-PHOTO-CROPPER-AND-ENHANCER.pdf},
year = {2021},
date = {2021-08-07},
urldate = {2021-08-07},
booktitle = {IEEE International Conference on Image Processing},
journal = {CoRR},
volume = {abs/2008.00634},
keywords = {Dataset, Image Enhancement, REU},
pubstate = {published},
tppubtype = {article}
}
Regmi, Krishna
Exploring Relationships Between Ground and Aerial Views by Synthesis and Matching PhD Thesis
University of Central Florida, 2021.
Tags: Cross-View, GAN, Geo-Localization, Ph.D. Dissertation, Video Geo-localization | Links:
@phdthesis{Regmi2021,
title = {Exploring Relationships Between Ground and Aerial Views by Synthesis and Matching},
author = {Krishna Regmi},
url = {https://stars.library.ucf.edu/etd2020/747/},
year = {2021},
date = {2021-08-02},
urldate = {2021-08-02},
school = {University of Central Florida},
keywords = {Cross-View, GAN, Geo-Localization, Ph.D. Dissertation, Video Geo-localization},
pubstate = {published},
tppubtype = {phdthesis}
}
Edraki, Marzieh
Implication of Manifold Assumption in Deep Learning Models for Computer Vision Applications PhD Thesis
University of Central Florida, 2021.
Tags: Capusules, Ph.D. Dissertation | Links:
@phdthesis{Edraki2021,
title = {Implication of Manifold Assumption in Deep Learning Models for Computer Vision Applications},
author = {Marzieh Edraki},
url = {https://stars.library.ucf.edu/etd2020/675/},
year = {2021},
date = {2021-08-01},
urldate = {2021-08-01},
school = {University of Central Florida},
keywords = {Capusules, Ph.D. Dissertation},
pubstate = {published},
tppubtype = {phdthesis}
}
Zhu, Sijie; Yang, Taojiannan; Chen, Chen
Visual Explanation for Deep Metric Learning Journal Article
In: IEEE Transactions on Image Processing, 2021.
@article{nokey,
title = {Visual Explanation for Deep Metric Learning},
author = {Sijie Zhu and Taojiannan Yang and Chen Chen},
url = {https://arxiv.org/pdf/1909.12977.pdf},
year = {2021},
date = {2021-07-30},
urldate = {2021-07-30},
journal = {IEEE Transactions on Image Processing},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Wang, Wenxuan; Chen, Chen; Ding, Meng; Li, Jiangyun; Yu, Hong; Zha, Sen
TransBTS: Multimodal Brain Tumor Segmentation Using Transformer Conference
International Conference on Medical Image Computing and Computer Assisted Intervention (MICCAI), 2021.
Tags: MICCAI | Links:
@conference{nokey,
title = {TransBTS: Multimodal Brain Tumor Segmentation Using Transformer},
author = {Wenxuan Wang and Chen Chen and Meng Ding and Jiangyun Li and Hong Yu and Sen Zha},
url = {https://arxiv.org/pdf/2103.04430.pdf},
year = {2021},
date = {2021-06-26},
booktitle = {International Conference on Medical Image Computing and Computer Assisted Intervention (MICCAI)},
keywords = {MICCAI},
pubstate = {published},
tppubtype = {conference}
}
Arif, Maliha; Mahalanobis, Abhijit
Infrared Target Recognition Using Realistic Training Images Generated by Modifying Latent Features of an Encoder-Decoder Network Journal Article
In: IEEE Transactions on Aerospace and Electronic Systems, 2021.
Abstract | Tags: TAES | Links:
@article{ArifTAES2021,
title = {Infrared Target Recognition Using Realistic Training Images Generated by Modifying Latent Features of an Encoder-Decoder Network},
author = {Maliha Arif and Abhijit Mahalanobis},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/IEEE_TAES_Revised_double_Column.pdf},
doi = {10.1109/TAES.2021.3090921},
year = {2021},
date = {2021-06-22},
urldate = {2021-06-22},
journal = {IEEE Transactions on Aerospace and Electronic Systems},
abstract = {Generating realistic images has been a challenging problem in computer vision, with many researchers focusing on novel methods and datasets to produce benchmark results. Our motivation for the same arises from the dearth of real training images for recognizing targets in infrared imagery. We propose an encoder-decoder architecture for generating realistic medium wave infrared images of targets at various azimuth angles, in day or night conditions, and at different ranges. Specifically, we use a CNN-based siamese autoencoder network that modifies the latent space embedding of a given input view to produce a novel output view. First, we train this network with a limited set of real images of the targets, and show that it can generate new and previously unseen views of the same. We show that the network operates in the non-linear feature subspace and learns the underlying manifold to develop a semantic understanding of the targets. We use the structural similarity index measure (SSIM) to quantify how the generated and real images of targets compare. Finally, we show classifiers trained with the generated images are able to recognize targets in real test images.
Index Terms—ATR Classification, view prediction, deep convolutional autoencoders, infrared imagery},
keywords = {TAES},
pubstate = {published},
tppubtype = {article}
}
Index Terms—ATR Classification, view prediction, deep convolutional autoencoders, infrared imagery
Duarte, Kevin; Rawat, Yogesh Singh; Shah, Mubarak
PLM: Partial Label Masking for Imbalanced Multi-label Classification Workshop
IEEE Conference on Computer Vision and Pattern Recognition, Learning from Limited or Imperfect Data (L2ID) Workshop, 2021.
Tags: CVPRW, Imbalanced Dataset, Multi-Label | Links:
@workshop{Duarte2021,
title = {PLM: Partial Label Masking for Imbalanced Multi-label Classification},
author = {Kevin Duarte and Yogesh Singh Rawat and Mubarak Shah
},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/PLM.pdf},
year = {2021},
date = {2021-06-20},
urldate = {2021-06-20},
booktitle = {IEEE Conference on Computer Vision and Pattern Recognition, Learning from Limited or Imperfect Data (L2ID) Workshop},
keywords = {CVPRW, Imbalanced Dataset, Multi-Label},
pubstate = {published},
tppubtype = {workshop}
}
Kumar, Aakash; Kini, Jyoti; Shah, Mubarak; Mian, Ajmal
PC-DAN: Point Cloud based Deep Affinity Network for 3D Multi-Object Tracking Workshop
IEEE Conference on Computer Vision and Pattern Recognition, 2nd Workshop on Visual Perception for Navigation in Human Environments - The JackRabbot Social Grouping and Activity Dataset and Benchmark, 2021.
Tags: Contest, CVPRW, LIDAR, Tracking | Links:
@workshop{Kumar2021,
title = {PC-DAN: Point Cloud based Deep Affinity Network for 3D Multi-Object Tracking},
author = {Aakash Kumar and Jyoti Kini and Mubarak Shah and Ajmal Mian},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/PC-DAN.pdf},
year = {2021},
date = {2021-06-20},
urldate = {2021-06-20},
booktitle = {IEEE Conference on Computer Vision and Pattern Recognition, 2nd Workshop on Visual Perception for Navigation in Human Environments - The JackRabbot Social Grouping and Activity Dataset and Benchmark},
keywords = {Contest, CVPRW, LIDAR, Tracking},
pubstate = {published},
tppubtype = {workshop}
}
Gagne, Crystal; Kini, Jyoti; Smith, Daniel; Shah, Mubarak
Florida Wildlife Camera Trap Dataset Workshop
IEEE Conference on Computer Vision and Pattern Recognition, CV4Animals: Computer Vision for Animal Behavior Tracking and Modeling Workshop, 2021.
Tags: CVPRW, Dataset, Wildlife Preservation | Links:
@workshop{Gagne2021,
title = {Florida Wildlife Camera Trap Dataset},
author = {Crystal Gagne and Jyoti Kini and Daniel Smith and Mubarak Shah },
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/Projects_Florida-Wildlife-Camera-Trap-Dataset.pdf
https://www.crcv.ucf.edu/research/projects/florida-wildlife-camera-trap-dataset/},
year = {2021},
date = {2021-06-20},
urldate = {2021-06-20},
booktitle = {IEEE Conference on Computer Vision and Pattern Recognition, CV4Animals: Computer Vision for Animal Behavior Tracking and Modeling Workshop},
keywords = {CVPRW, Dataset, Wildlife Preservation},
pubstate = {published},
tppubtype = {workshop}
}
Georgescu, Mariana Iuliana; Bărbălău, Antonio; Ionescu, Radu Tudor; Khan, Fahad Shahbaz; Popescu, Marius; Shah, Mubarak
Anomaly Detection in Video via Self-Supervised and Multi-Task Learning Conference
IEEE Conference on Computer Vision and Pattern Recognition, 2021.
Abstract | Tags: Anomaly Detection, Multi-Task Learning, Self-Supervised Learning | Links:
@conference{georgescu2020anomaly,
title = {Anomaly Detection in Video via Self-Supervised and Multi-Task Learning},
author = {Mariana Iuliana Georgescu and Antonio Bărbălău and Radu Tudor Ionescu and Fahad Shahbaz Khan and Marius Popescu and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/Publications_Anomaly-Detection-in-Video-via-Self-Supervised-and-Multi-Task-Learning.pdf
https://www.crcv.ucf.edu/wp-content/uploads/2018/11/Publications_Anomaly-Detection-in-Video-via-Self-Supervised-and-Multi-Task-Learning_Supp.pdf
https://www.crcv.ucf.edu/wp-content/uploads/2018/11/Anomaly-Detection-in-Video-via-Self-Supervised-and-Multi-Task-Learning.pptx
https://www.crcv.ucf.edu/wp-content/uploads/2018/11/Anomaly-Detection-in-Video-via-Self-Supervised-and-Multi-Task-Learning.mp4},
year = {2021},
date = {2021-06-19},
urldate = {2021-06-19},
publisher = {IEEE Conference on Computer Vision and Pattern Recognition},
abstract = {Anomaly detection in video is a challenging computer vision problem. Due to the lack of anomalous events at training time, anomaly detection requires the design of learning methods without full supervision. In this paper, we approach anomalous event detection in video through self-supervised and multi-task learning at the object level. We first utilize a pre-trained detector to detect objects. Then, we train a 3D convolutional neural network to produce discriminative anomaly-specific information by jointly learning multiple proxy tasks: three self-supervised and one based on knowledge distillation. The self-supervised tasks are: (i) discrimination of forward/backward moving objects (arrow of time), (ii) discrimination of objects in consecutive/intermittent frames (motion irregularity) and (iii) reconstruction of object-specific appearance information. The knowledge distillation task takes into account both classification and detection information, generating large prediction discrepancies between teacher and student models when anomalies occur. To the best of our knowledge, we are the first to approach anomalous event detection in video as a multi-task learning problem, integrating multiple self-supervised and knowledge distillation proxy tasks in a single architecture. Our lightweight architecture outperforms the state-of-the-art methods on three benchmarks: Avenue, ShanghaiTech and UCSD Ped2. Additionally, we perform an ablation study demonstrating the importance of integrating self-supervised learning and normality-specific distillation in a multi-task learning setting.},
keywords = {Anomaly Detection, Multi-Task Learning, Self-Supervised Learning},
pubstate = {published},
tppubtype = {conference}
}
Ashraf, Muhammad Waseem; Sultani, Waqas; Shah, Mubarak
Dogfight: Detecting Drones from Drones Videos Conference
IEEE Conference on Computer Vision and Pattern Recognition, 2021.
Tags: CVPR, Drone Video Analysis, Object Detection, UAV Video Analysis, UVA Video Analysis | Links:
@conference{Sultani2021,
title = {Dogfight: Detecting Drones from Drones Videos },
author = {Muhammad Waseem Ashraf and Waqas Sultani and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/Dogfight.pdf
https://www.crcv.ucf.edu/wp-content/uploads/2018/11/Dogfight_Supp.mp4
https://github.com/mwaseema/Drone-Detection
http://im.itu.edu.pk/dogfight-detecting-drones-from-drones-videos/
https://docs.google.com/presentation/d/1huBSbYzyNUCs-gJHdSU2CZ0XfRlBWGOQ/edit#slide=id.p1},
year = {2021},
date = {2021-06-19},
urldate = {2021-06-19},
publisher = {IEEE Conference on Computer Vision and Pattern Recognition},
keywords = {CVPR, Drone Video Analysis, Object Detection, UAV Video Analysis, UVA Video Analysis},
pubstate = {published},
tppubtype = {conference}
}
Tirupattur, Praveen; Duarte, Kevin; Rawat, Yogesh Singh; Shah, Mubarak
Modeling Multi-Label Action Dependencies for Temporal Action Localization Conference
IEEE Conference on Computer Vision and Pattern Recognition (Oral), 2021.
Tags: CVPR, Transformers | Links:
@conference{Tirupattur2021,
title = {Modeling Multi-Label Action Dependencies for Temporal Action Localization},
author = {Praveen Tirupattur and Kevin Duarte and Yogesh Singh Rawat and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/Modeling-Multi-Label-Action-Dependencies-for-Temporal-Action-Localization.pdf
https://www.crcv.ucf.edu/wp-content/uploads/2018/11/Modeling-Multi-Label-Action-Dependencies-for-Temporal-Action-Localization_Supp.zip
https://www.crcv.ucf.edu/wp-content/uploads/2018/11/Modeling-Multi-Label-Action-Dependencies-for-Temporal-Action-Localization-CVPR-2021-ORAL.mp4},
year = {2021},
date = {2021-06-19},
urldate = {2021-06-19},
publisher = {IEEE Conference on Computer Vision and Pattern Recognition (Oral)},
keywords = {CVPR, Transformers},
pubstate = {published},
tppubtype = {conference}
}
Zaeemzadeh, Alireza; Bisagno, Niccolò; Sambugaro, Zeno; Conci, Nicola; Rahnavard, Nazanin; Shah, Mubarak
Out-of-Distribution Detection Using Union of 1-Dimensional Subspaces Conference
IEEE Conference on Computer Vision and Pattern Recognition, 2021.
Tags: CVPR, Open World, Out of Distribution (OOD) | Links:
@conference{Zaeemzadeh2021,
title = {Out-of-Distribution Detection Using Union of 1-Dimensional Subspaces},
author = {Alireza Zaeemzadeh and Niccolò Bisagno and Zeno Sambugaro and Nicola Conci and Nazanin Rahnavard and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/Out-of-Distribution-Detection-Using-Union-of-1-Dimensional-Subspaces.pdf
https://www.crcv.ucf.edu/wp-content/uploads/2018/11/Out-of-Distribution-Detection-Using-Union-of-1-Dimensional-Subspaces_Supp.pdf
https://www.crcv.ucf.edu/wp-content/uploads/2018/11/CVPR21_presentation_video.mov},
year = {2021},
date = {2021-06-19},
urldate = {2021-06-19},
publisher = {IEEE Conference on Computer Vision and Pattern Recognition},
keywords = {CVPR, Open World, Out of Distribution (OOD)},
pubstate = {published},
tppubtype = {conference}
}
Khan, Aisha Urooj; Kuehne, Hilde; Duarte, Kevin; Gan, Chuang; Lobo, Niels Da Vitoria; Shah, Mubarak
Found a Reason for me? Weakly-supervised Grounded Visual Question Answering using Capsules Conference
IEEE Conference on Computer Vision and Pattern Recognition, 2021.
Tags: Capsule Networks, CVPR, Grounding, VQA | Links:
@conference{Khan2021b,
title = {Found a Reason for me? Weakly-supervised Grounded Visual Question Answering using Capsules},
author = {Aisha Urooj Khan and Hilde Kuehne and Kevin Duarte and Chuang Gan and Niels Da Vitoria Lobo and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/Found-a-Reason-for-me.pdf
https://www.crcv.ucf.edu/wp-content/uploads/2018/11/Found-a-Reason-for-me_Supp.pdf
https://www.crcv.ucf.edu/wp-content/uploads/2018/11/cvpr21_poster_v2.pdf
https://www.crcv.ucf.edu/wp-content/uploads/2018/11/cvpr_2021_5min.mp4},
year = {2021},
date = {2021-06-19},
urldate = {2021-06-19},
publisher = {IEEE Conference on Computer Vision and Pattern Recognition},
keywords = {Capsule Networks, CVPR, Grounding, VQA},
pubstate = {published},
tppubtype = {conference}
}
Rizve, Mamshad Nayeem; Khan, Salman; Khan, Fahad Shahbaz; Shah, Mubarak
Exploring Complementary Strengths of Invariant and Equivariant Representations for Few-Shot Learning Conference
IEEE Conference on Computer Vision and Pattern Recognition, 2021.
Tags: CVPR, Equivariance, Few-Shot Learning, Invariance | Links:
@conference{Rizve2021b,
title = {Exploring Complementary Strengths of Invariant and Equivariant Representations for Few-Shot Learning},
author = {Mamshad Nayeem Rizve and Salman Khan and Fahad Shahbaz Khan and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/Exploring-Complementary-Strengths-of-Invariant-and-Equivariant.pdf
https://www.crcv.ucf.edu/wp-content/uploads/2018/11/Exploring-Complementary-Strengths-of-Invariant-and-Equivariant_Supp.pdf
https://github.com/nayeemrizve/invariance-equivariance
https://www.crcv.ucf.edu/wp-content/uploads/2018/11/FSL_CVPR2021_Video_Final.mp4},
year = {2021},
date = {2021-06-19},
urldate = {2021-06-19},
publisher = {IEEE Conference on Computer Vision and Pattern Recognition},
keywords = {CVPR, Equivariance, Few-Shot Learning, Invariance},
pubstate = {published},
tppubtype = {conference}
}
Rizve, Mamshad Nayeem; Duarte, Kevin; Rawat, Yogesh Singh; Shah, Mubarak
In Defense of Pseudo-Labeling: An Uncertainty-Aware Pseudo-label Selection Framework for Semi-Supervised Learning Conference
Ninth International Conference on Learning Representations (ICLR), 2021.
Tags: ICLR, Network Calibration, Pseudo-Labeling, Semi-supervised learning | Links:
@conference{Rizve2021,
title = {In Defense of Pseudo-Labeling: An Uncertainty-Aware Pseudo-label Selection Framework for Semi-Supervised Learning},
author = {Mamshad Nayeem Rizve and Kevin Duarte and Yogesh Singh Rawat and Mubarak Shah },
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/In-Defense-Of-Pseudo-Labeling.pdf
https://github.com/nayeemrizve/ups
https://www.crcv.ucf.edu/wp-content/uploads/2018/11/UPS_ICLR2021_Slides.pdf
https://www.crcv.ucf.edu/wp-content/uploads/2018/11/UPS_ICLR2021_Slides.pptx
https://www.crcv.ucf.edu/wp-content/uploads/2018/11/UPS_Poster_ICLR2021.png},
year = {2021},
date = {2021-05-04},
urldate = {2021-05-04},
booktitle = {Ninth International Conference on Learning Representations (ICLR)},
keywords = {ICLR, Network Calibration, Pseudo-Labeling, Semi-supervised learning},
pubstate = {published},
tppubtype = {conference}
}
Sultani, Waqas; Shah, Mubarak
Human Action Recognition in Drone Videos using a Few Aerial Training Examples Journal Article
In: Computer Vision and Image Understanding, vol. vol. 206, no. 103186, 2021.
Tags: CVIU, Drone Video Analysis, Human Action Recognition, Multi-Task Learning, UAV Video Analysis | Links:
@article{Sultani2020,
title = {Human Action Recognition in Drone Videos using a Few Aerial Training Examples},
author = {Waqas Sultani and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/1910.10027.pdf
https://www.crcv.ucf.edu/research/projects/human-action-recognition-in-drone-videos-using-a-few-aerial-training-examples/},
year = {2021},
date = {2021-05-01},
urldate = {2021-05-01},
journal = {Computer Vision and Image Understanding},
volume = {vol. 206},
number = {103186},
keywords = {CVIU, Drone Video Analysis, Human Action Recognition, Multi-Task Learning, UAV Video Analysis},
pubstate = {published},
tppubtype = {article}
}
Zaeemzadeh, Alireza
Robust and Scalable Data Representation and Analysis Leveraging Isometric Transformations and Sparsity PhD Thesis
University of Central Florida, 2021.
Tags: Ph.D. Dissertation | Links:
@phdthesis{Zaeemzadeh2021b,
title = {Robust and Scalable Data Representation and Analysis Leveraging Isometric Transformations and Sparsity},
author = {Alireza Zaeemzadeh},
url = {https://www.cecs.ucf.edu/graddefense-old/pdf/13258},
year = {2021},
date = {2021-05-01},
urldate = {2021-05-01},
school = {University of Central Florida},
keywords = {Ph.D. Dissertation},
pubstate = {published},
tppubtype = {phdthesis}
}
Georgescu, Mariana-Iuliana; Ionescu, Radu Tudor; Khan, Fahad Shahbaz; Popescu, Marius; Shah, Mubarak
A Background-Agnostic Framework with Adversarial Training for Abnormal Event Detection in Video Journal Article
In: IEEE Transactions on Pattern Analysis and Machine Intelligence, 2021.
Tags: Abnormal Event Detection, Anomaly Detection | Links:
@article{Georgescu2021,
title = {A Background-Agnostic Framework with Adversarial Training for Abnormal Event Detection in Video},
author = {Mariana-Iuliana Georgescu and Radu Tudor Ionescu and Fahad Shahbaz Khan and Marius Popescu and Mubarak Shah
},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/AED_PAMI-revised-arxiv.pdf},
year = {2021},
date = {2021-04-16},
urldate = {2021-04-16},
journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
keywords = {Abnormal Event Detection, Anomaly Detection},
pubstate = {published},
tppubtype = {article}
}
Khan, Salman; Naseer, Muzammal; Hayat, Munawar; Zamir, Syed Waqas; Khan, Fahad Shahbaz; Shah, Mubarak
Transformers in Vision: A Survey Technical Report
no. arXiv:2101.01169, 2021.
Tags: Survey, Transformers | Links:
@techreport{Khan2021,
title = {Transformers in Vision: A Survey},
author = {Salman Khan and Muzammal Naseer and Munawar Hayat and Syed Waqas Zamir and Fahad Shahbaz Khan and Mubarak Shah },
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/Publications_TransformersSurvey.pdf},
year = {2021},
date = {2021-02-02},
urldate = {2021-02-02},
number = {arXiv:2101.01169},
keywords = {Survey, Transformers},
pubstate = {published},
tppubtype = {techreport}
}
Rana, Aayush; Rawat, Yogesh Singh
We don’t Need Thousand Proposals: Single Shot Actor-Action Detection in Videos Conference
IEEE 2021 Winter Conference on Applications of Computer Vision (WACV), 2021.
Tags: WACV | Links:
@conference{Rana2021,
title = {We don’t Need Thousand Proposals: Single Shot Actor-Action Detection in Videos},
author = {Aayush Rana and Yogesh Singh Rawat
},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2020/12/Projects_Single-shot-actor-action-detection-in-videos.pdf
https://www.crcv.ucf.edu/research/projects/we-dont-need-thousand-proposals-single-shot-actor-action-detection-in-videos/
https://youtu.be/GHKpr5VTbV8},
year = {2021},
date = {2021-01-05},
urldate = {2021-01-05},
booktitle = {IEEE 2021 Winter Conference on Applications of Computer Vision (WACV)},
keywords = {WACV},
pubstate = {published},
tppubtype = {conference}
}
Zheng, Ce; Wu, Wenhan; Yang, Taojiannan; Zhu, Sijie; Chen, Chen; Liu, Ruixu; Shen, Ju; Kehtarnavaz, Nasser; Shah, Mubarak
Deep Learning-Based Human Pose Estimation: A Survey Technical Report
no. arXiv:2012.13392, 2021.
Tags: Human Pose Estimation | Links:
@techreport{Zheng2021,
title = {Deep Learning-Based Human Pose Estimation: A Survey},
author = {Ce Zheng and Wenhan Wu and Taojiannan Yang and Sijie Zhu and Chen Chen and Ruixu Liu and Ju Shen and Nasser Kehtarnavaz and Mubarak Shah},
url = {https://arxiv.org/pdf/2012.13392.pdf
https://github.com/zczcwh/DL-HPE},
year = {2021},
date = {2021-01-02},
number = {arXiv:2012.13392},
keywords = {Human Pose Estimation},
pubstate = {published},
tppubtype = {techreport}
}
2020
Khan, Aisha Urooj; Mazaheri, Amir; da Vitoria Lobo, Niels; Shah, Mubarak
MMFT-BERT: Multimodal Fusion Transformer with BERT Encodings for Visual Question Answering Conference
Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing: Findings, EMNLP, 2020.
Tags: EMNLP, Visual Question Answering | Links:
@conference{Khan2020b,
title = {MMFT-BERT: Multimodal Fusion Transformer with BERT Encodings for Visual Question Answering},
author = {Aisha Urooj Khan and Amir Mazaheri and Niels da Vitoria Lobo and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/Publications_MMFT-BERT.pdf},
year = {2020},
date = {2020-11-16},
booktitle = {Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing: Findings, EMNLP},
keywords = {EMNLP, Visual Question Answering},
pubstate = {published},
tppubtype = {conference}
}
Georgescu, Mariana-Iuliana; Barbalau, Antonio; Ionescu, Radu Tudor; Khan, Fahad Shahbaz; Popescu, Marius; Shah, Mubarak
Anomaly Detection in Video via Self-Supervised and Multi-Task Learning Technical Report
no. arXiv:2011.07491, 2020.
Tags: Anomaly Detection, CVPR-2021, Multi-Task Learning, Self-Supervised Learning | Links:
@techreport{Georgescu2020b,
title = {Anomaly Detection in Video via Self-Supervised and Multi-Task Learning},
author = {Mariana-Iuliana Georgescu and Antonio Barbalau and Radu Tudor Ionescu and Fahad Shahbaz Khan and Marius Popescu and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/Publicatons_Anomaly-Detection-in-Video-via-Self-Supervised-and-Multi-Task-Learning.pdf},
year = {2020},
date = {2020-11-15},
urldate = {2020-11-15},
number = {arXiv:2011.07491},
howpublished = {CVPR-2021},
keywords = {Anomaly Detection, CVPR-2021, Multi-Task Learning, Self-Supervised Learning},
pubstate = {published},
tppubtype = {techreport}
}
McIntosh, Bruce; Venkataramanan, Shashanka; Mahalanobis, Abhijit
Target Detection in Cluttered Environments Using Infra-Red Images Conference
IEEE International Conference on Image Processing, 2020.
Abstract | Tags: ICIP | Links:
@conference{McIntosh2020,
title = {Target Detection in Cluttered Environments Using Infra-Red Images},
author = {Bruce McIntosh and Shashanka Venkataramanan and Abhijit Mahalanobis},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2020/07/icip2020_photoready.pdf},
year = {2020},
date = {2020-10-28},
booktitle = {IEEE International Conference on Image Processing},
abstract = {The detection of targets in infra-red imagery is a challenging problem which involves locating small targets in heavily
cluttered environments while maintaining a low false alarm rate. We propose a network that optimizes a “target to clutter
ratio”(TCR) metric defined as the ratio of the output energies produced by the network in response to targets and clutter.
We show that for target detection, it is advantageous to analytically derive the first layer of a CNN to maximize the
TCR metric, and then train the rest of the network to optimize the same cost function. We evaluate the performance of the
resulting network using a public domain MWIR data set released by the US Army’s Night Vision Laboratories, and compare
it to the state-of-the-art detectors such as Faster RCNN and Yolo-v3. Referred to as the TCRNet, the proposed network
demonstrates state of the art results with greater than 30% improvement in probability of detection while reducing
the false alarm rate by more than a factor of 2 when compared to these leading methods. Ablation studies also show that the
proposed approach and metric are superior to learning the entire network from scratch, or using conventional regression
metrics such as the mean square error (MSE). },
keywords = {ICIP},
pubstate = {published},
tppubtype = {conference}
}
cluttered environments while maintaining a low false alarm rate. We propose a network that optimizes a “target to clutter
ratio”(TCR) metric defined as the ratio of the output energies produced by the network in response to targets and clutter.
We show that for target detection, it is advantageous to analytically derive the first layer of a CNN to maximize the
TCR metric, and then train the rest of the network to optimize the same cost function. We evaluate the performance of the
resulting network using a public domain MWIR data set released by the US Army’s Night Vision Laboratories, and compare
it to the state-of-the-art detectors such as Faster RCNN and Yolo-v3. Referred to as the TCRNet, the proposed network
demonstrates state of the art results with greater than 30% improvement in probability of detection while reducing
the false alarm rate by more than a factor of 2 when compared to these leading methods. Ablation studies also show that the
proposed approach and metric are superior to learning the entire network from scratch, or using conventional regression
metrics such as the mean square error (MSE).
Georgescu, Mariana-Iuliana; Ionescu, Radu Tudor; Khan, Fahad Shahbaz; Popescu, Marius; Shah, Mubarak
A Scene-Agnostic Framework with Adversarial Training for Abnormal Event Detection in Video Technical Report
no. arXiv:2008.12328, 2020.
Tags: Anomaly Detection | Links:
@techreport{Georgescu2020,
title = {A Scene-Agnostic Framework with Adversarial Training for Abnormal Event Detection in Video},
author = {Mariana-Iuliana Georgescu and Radu Tudor Ionescu and Fahad Shahbaz Khan and Marius Popescu and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2020/08/Publications_A-Scene-Agnostic-Framework-with-Adversarial-Training-for-Abnormal-Event-Detection-in-Video.pdf},
year = {2020},
date = {2020-08-27},
journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence, Vol. 14, No. 8},
number = {arXiv:2008.12328},
keywords = {Anomaly Detection},
pubstate = {published},
tppubtype = {techreport}
}
Sun, ShiJie; Akhtar, Naveed; Song, XiangYu; Song, HuanSheng; Mian, Ajmal; Shah, Mubarak
Simultaneous Detection and Tracking with Motion Modelling for Multiple Object Tracking Conference
16th European Conference on Computer Vision, 2020.
Abstract | Tags: ECCV, Tracking | Links:
@conference{Sun2020,
title = {Simultaneous Detection and Tracking with Motion Modelling for Multiple Object Tracking},
author = {ShiJie Sun and Naveed Akhtar and XiangYu Song and HuanSheng Song and Ajmal Mian and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2020/07/Publications_Simultaneous-Detection-and-Tracking-with-Motion-Modelling-for-Multiple-Object-Tracking.pdf
https://www.crcv.ucf.edu/wp-content/uploads/2020/07/Publications_Simultaneous-Detection-and-Tracking-with-Motion-Modelling-for-Multiple-Object-Tracking_Supp.pdf
https://shijies.github.io/DMMN_Page/},
year = {2020},
date = {2020-08-23},
booktitle = {16th European Conference on Computer Vision},
abstract = {Deep learning based Multiple Object Tracking (MOT) currently relies on off-the-shelf detectors for tracking-by-detection. This results in deep models that are detector biased and evaluations that are detector influenced. To resolve this issue, we introduce Deep Motion Modeling Network (DMM-Net) that can estimate multiple objects’ motion parameters to perform joint detection and association in an end-to-end manner. DMM-Net models object features over multiple frames and simultaneously infers object classes, visibility and their motion parameters. These outputs are readily used to update the tracklets for efficient MOT. DMM-Net achieves PR-MOTA score of 12.80 @ 120+ fps for the popular UA-DETRAC challenge - which is better performance and orders of magnitude faster. We also contribute a synthetic large-scale public dataset Omni-MOT for vehicle tracking that provides precise ground-truth annotations to eliminate the detector influence in MOT evaluation. This 14M+ frames dataset is extendable with our public script (Code at Dataset, Dataset Recorder, Omni-MOT Source). We demonstrate the suitability of Omni-MOT for deep learning with DMM-Net, and also make the source code of our network public.},
keywords = {ECCV, Tracking},
pubstate = {published},
tppubtype = {conference}
}
Xie, Jin; Cholakkal, Hisham; Anwer, Rao Muhammad; Khan, Fahad Shahbaz; Pang, Yanwei; Shao, Ling; Shah, Mubarak
Count- and Similarity-aware R-CNN for Pedestrian Detection Conference
16th European Conference on Computer Vision, 2020.
Abstract | Tags: Detection, ECCV | Links:
@conference{Xie2020,
title = {Count- and Similarity-aware R-CNN for Pedestrian Detection},
author = {Jin Xie and Hisham Cholakkal and Rao Muhammad Anwer and Fahad Shahbaz Khan and Yanwei Pang and Ling Shao and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2020/07/Publications_Count-and-Similarity-aware-R-CNN-for-Pedestrian-Detection.pdf},
year = {2020},
date = {2020-08-23},
booktitle = {16th European Conference on Computer Vision},
abstract = {Recent pedestrian detection methods generally rely on additional supervision, such as visible bounding-box annotations, to handle heavy occlusions. We propose an approach that leverages pedestrian count and proposal similarity information within a two-stage pedestrian detection framework. Both pedestrian count and proposal similarity are derived from standard full-body annotations commonly used to train pedestrian detectors. We introduce a count-weighted detection loss function that assigns higher weights to the detection errors occurring at highly overlapping pedestrians. The proposed loss function is utilized at both stages of the two-stage detector. We further introduce a count-andsimilarity branch within the two-stage detection framework, which predicts pedestrian count as well as proposal similarity to identify distinct proposals. Our approach requires neither part information nor visible bounding-box annotations. Experiments are performed on the CityPersons and CrowdHuman datasets. Our method sets a new state-of-the-art on both datasets. Further, it achieves an absolute gain of 2.4% over the current state-of-the-art, in terms of log-average miss rate, on the heavily occluded (HO) set of CityPersons test set, without using additional visible bounding-box supervision. Finally, we demonstrate the applicability of our approach for the problem of human instance segmentation. Code and models are available at: https://github.com/Leotju/CaSe.},
keywords = {Detection, ECCV},
pubstate = {published},
tppubtype = {conference}
}
Schatz, Kara Marie; Quintanilla, Erik; Vyas, Shruti; Rawat, Yogesh Singh
A Recurrent Transformer Network for Novel View Action Synthesis Conference
16th European Conference on Computer Vision, 2020.
Abstract | Tags: Cross-View, ECCV, REU, View Action Synthesis, View Synthesis | Links:
@conference{Schatz2020,
title = {A Recurrent Transformer Network for Novel View Action Synthesis},
author = {Kara Marie Schatz and Erik Quintanilla and Shruti Vyas and Yogesh Singh Rawat},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2020/07/Projects_A-Recurrent-Transformer-Network-for-Novel-View-Action-Synthesis.pdf
https://www.crcv.ucf.edu/research/projects/a-recurrent-transformer-network-for-novel-view-action-synthesis/},
year = {2020},
date = {2020-08-23},
urldate = {2020-08-23},
booktitle = {16th European Conference on Computer Vision},
abstract = {In this work, we address the problem of synthesizing human actions from novel views. Given an input video of an actor performing some action, we aim to synthesize a video with the same action performed from a novel view with the help of an appearance prior. We propose an end-to-end deep network to solve this problem. The proposed network utilizes the change in viewpoint to transform the action from the input view to the novel view in feature space. The transformed action is integrated with the target appearance using the proposed recurrent transformer network, which provides a transformed appearance for each time-step in the action sequence. The recurrent transformer network utilize action key-points which are determined in an unsupervised approach using the encoded action features. We also propose a hierarchical structure for the recurrent transformation which further improves the performance. We demonstrate the effectiveness of the proposed method through extensive experiments conducted on a large-scale multi-view action recognition NTU-RGB+D dataset. In addition, we show that the proposed method can transform the action to a novel viewpoint with an entirely different scene or actor. The code is publicly available at https://github.com/schatzkara/cross-view-video.},
keywords = {Cross-View, ECCV, REU, View Action Synthesis, View Synthesis},
pubstate = {published},
tppubtype = {conference}
}
Vyas, Shruti; Rawat, Yogesh Singh; Shah, Mubarak
Multi-view Action Recognition using Cross-view Video Prediction Conference
16th European Conference on Computer Vision, 2020.
Abstract | Tags: Cross-View Video Prediction, ECCV | Links:
@conference{Vyas2020,
title = {Multi-view Action Recognition using Cross-view Video Prediction},
author = {Shruti Vyas and Yogesh Singh Rawat and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2020/07/Projects_Multi-view-Action-Recognition-using-Cross-view-Video-Prediction.pdf
https://www.crcv.ucf.edu/wp-content/uploads/2020/07/Projects_Multi-view-Action-Recognition-using-Cross-view-Video-Prediction_Supp.pdf
https://www.crcv.ucf.edu/research/projects/multi-view-action-recognition-using-cross-view-video-prediction/},
year = {2020},
date = {2020-08-23},
booktitle = {16th European Conference on Computer Vision},
abstract = {In this work, we address the problem of action recognition in a multi-view environment. Most of the existing approaches utilize pose information for multi-view action recognition. We focus on RGB modality instead and propose an unsupervised representation learning framework, which encodes the scene dynamics in videos captured from multiple viewpoints via predicting actions from unseen views. The framework takes multiple short video clips from different viewpoints and time as input and learns an holistic internal representation which is used to predict a video clip from an unseen viewpoint and time. The ability of the proposed network to render unseen video frames enables it to learn a meaningful and robust representation of the scene dynamics. We evaluate the effectiveness of the learned representation for multiview video action recognition in a supervised approach. We observe a significant improvement in the performance with RGB modality on NTU-RGB+D dataset, which is the largest dataset for multi-view action recognition. The proposed framework also achieves state-of-the-art results with depth modality, which validates the generalization capability of the approach to other data modalities. The code is publicly available at https://github.com/svyas23/cross-view-action.},
keywords = {Cross-View Video Prediction, ECCV},
pubstate = {published},
tppubtype = {conference}
}
Venkataramanan, Shashanka; Peng, Kuan-Chuan; Singh, Rajat Vikram; Mahalanobis, Abhijit
Attention Guided Anomaly Localization in Images Conference
16th European Conference on Computer Vision, 2020.
Abstract | Tags: ECCV | Links:
@conference{Venkataramanan2020,
title = {Attention Guided Anomaly Localization in Images},
author = {Shashanka Venkataramanan and Kuan-Chuan Peng and Rajat Vikram Singh and Abhijit Mahalanobis},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2020/07/2813.pdf
https://youtu.be/b-EQr-fGPWo},
year = {2020},
date = {2020-08-23},
booktitle = {16th European Conference on Computer Vision},
abstract = {Anomaly localization is an important problem in computer vision which involves localizing anomalous regions within images with applications in industrial inspection, surveillance, and medical imaging. This task is challenging due to the small sample size and pixel coverage of the anomaly in real-world scenarios. Most prior works need to use anomalous training images to compute a class-specific threshold to localize anomalies. Without the need of anomalous training images, we propose Convolutional Adversarial Variational autoencoder with Guided Attention (CAVGA), which localizes the anomaly with a convolutional latent variable to preserve the spatial information. In the unsupervised setting, we propose an attention expansion loss where we encourage CAVGA to focus on all normal regions in the image. Furthermore, in the weakly supervised setting we propose a complementary guided attention loss, where we encourage the attention map to focus on all normal regions while minimizing the attention map corresponding to anomalous regions in the image. CAVGA outperforms the state-of-the-art (SOTA) anomaly localization methods on MVTec Anomaly Detection (MVTAD), modified ShanghaiTech Campus (mSTC) and Large-scale Attention based Glaucoma (LAG) datasets in the unsupervised setting and when using only 2% anomalous images in the weakly-supervised setting. CAVGA also outperforms SOTA anomaly detection methods on the MNIST, CIFAR-10, Fashion-MNIST, MVTAD, mSTC and LAG datasets.},
keywords = {ECCV},
pubstate = {published},
tppubtype = {conference}
}
LaLonde, Rodney
Algorithms and Applications of Novel Capsule Networks PhD Thesis
University of Central Florida, 2020.
Tags: Ph.D. Dissertation | Links:
@phdthesis{LaLonde2020b,
title = {Algorithms and Applications of Novel Capsule Networks},
author = {Rodney LaLonde},
url = {https://stars.library.ucf.edu/etd2020/612/},
year = {2020},
date = {2020-08-02},
urldate = {2020-08-02},
school = {University of Central Florida},
keywords = {Ph.D. Dissertation},
pubstate = {published},
tppubtype = {phdthesis}
}
RaviPrakash, Harish
Novel Computational Approaches for Multidimensional Brain Image Analysis PhD Thesis
University of Central Florida, 2020.
Tags: Ph.D. Dissertation | Links:
@phdthesis{nokey,
title = {Novel Computational Approaches for Multidimensional Brain Image Analysis},
author = {Harish RaviPrakash},
url = {https://stars.library.ucf.edu/etd2020/618/},
year = {2020},
date = {2020-08-01},
urldate = {2020-08-01},
school = {University of Central Florida},
keywords = {Ph.D. Dissertation},
pubstate = {published},
tppubtype = {phdthesis}
}
Zhang, Xiaoyu; Mian, Ajmal; Gupta, Rohit; Rahnavard, Nazanin; Shah, Mubarak
Cassandra: Detecting Trojaned Networks from Adversarial Perturbations Technical Report
no. arXiv:2007.14433, 2020.
Tags: Adversarial Attacks | Links:
@techreport{Zhang2020,
title = {Cassandra: Detecting Trojaned Networks from Adversarial Perturbations},
author = {Xiaoyu Zhang and Ajmal Mian and Rohit Gupta and Nazanin Rahnavard and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2020/08/Publications_Cassandra-Detecting-Trojaned-Networks-from-Adversarial-Perturbations.pdf},
year = {2020},
date = {2020-07-28},
urldate = {2020-07-28},
number = {arXiv:2007.14433},
keywords = {Adversarial Attacks},
pubstate = {published},
tppubtype = {techreport}
}
Edraki, Marzieh; Karim, Nazmul; Rahnavard, Nazanin; Mian, Ajmal; Shah, Mubarak
Odyssey: Creation, Analysis and Detection of Trojan Models Technical Report
no. arXiv:2007.08142, 2020.
Tags: Adversarial Attacks | Links:
@techreport{Edraki2020,
title = {Odyssey: Creation, Analysis and Detection of Trojan Models},
author = {Marzieh Edraki and Nazmul Karim and Nazanin Rahnavard and Ajmal Mian and Mubarak Shah
},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2020/08/Publications_Odyssey-Creation-Analysis-and-Detection-of-Trojan-Models.pdf
https://www.crcv.ucf.edu/research/projects/odyssey-creation-analysis-and-detection-of-trojan-models/},
year = {2020},
date = {2020-07-16},
number = {arXiv:2007.08142},
keywords = {Adversarial Attacks},
pubstate = {published},
tppubtype = {techreport}
}
Demir, Ugur; Rawat, Yogesh Singh; Shah, Mubarak
TinyVIRAT: Low-resolution Video Action Recognition Technical Report
no. arXiv:2007.07355, 2020.
Tags: Video Action Recognition | Links:
@techreport{Demir2020,
title = {TinyVIRAT: Low-resolution Video Action Recognition},
author = {Ugur Demir and Yogesh Singh Rawat and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2020/07/Publications_TinyVIRAT.pdf
https://www.crcv.ucf.edu/research/projects/tinyvirat-low-resolution-video-action-recognition/},
year = {2020},
date = {2020-07-14},
number = {arXiv:2007.07355},
keywords = {Video Action Recognition},
pubstate = {published},
tppubtype = {techreport}
}
Joneidi, Mohsen; Vahidian, Saeed; Esmaeili, Ashkan; Wang, Weijia; Rahnavard, Nazanin; Lin, Bill; Shah, Mubarak
Select to Better Learn: Fast and Accurate Deep Learning using Data Selection from Nonlinear Manifolds Conference
IEEE Conference on Computer Vision and Pattern Recognition, 2020.
Abstract | Tags: CVPR | Links:
@conference{Jonediei2020,
title = {Select to Better Learn: Fast and Accurate Deep Learning using Data Selection from Nonlinear Manifolds},
author = {Mohsen Joneidi and Saeed Vahidian and Ashkan Esmaeili and Weijia Wang and Nazanin Rahnavard and Bill Lin and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2020/04/Select-to-Better-Learn.pdf
https://www.crcv.ucf.edu/wp-content/uploads/2020/04/Select-to-Better-Learn_Supplementary.pdf},
year = {2020},
date = {2020-06-14},
publisher = {IEEE Conference on Computer Vision and Pattern Recognition},
abstract = {Finding a small subset of data whose linear combination spans other data points, also called column subset selection problem (CSSP), is an important open problem in computer science with many applications in computer vision and deep learning such as the ones shown in Fig. 1. There are some studies that solve CSSP in a polynomial time complexity w.r.t. the size of the original dataset. A simple and efficient selection algorithm with a linear complexity order, referred to as spectrum pursuit (SP), is proposed that pursuits spectral components of the dataset using available sample points. The proposed non-greedy algorithm aims to iteratively find K data samples whose span is close to that of the first K spectral components of entire data. SP has no parameter to be fine tuned and this desirable property makes it problem-independent. The simplicity of SP enables us to extend the underlying linear model to more complex models such as nonlinear manifolds and graph-based models. The nonlinear extension of SP is introduced as kernel-SP (KSP). The superiority of the proposed algorithms is demonstrated in a wide range of applications.},
keywords = {CVPR},
pubstate = {published},
tppubtype = {conference}
}
McIntosh, Bruce; Duarte, Kevin; Rawat, Yogesh Singh; Shah, Mubarak
Visual-textual Capsule Routing for Text-based Video Segmentation Conference
IEEE Conference on Computer Vision and Pattern Recognition (Oral), 2020.
Abstract | Tags: Capsule Networks, CVPR, Video Object Segmentation | Links:
@conference{Duarte2020,
title = {Visual-textual Capsule Routing for Text-based Video Segmentation},
author = {Bruce McIntosh and Kevin Duarte and Yogesh Singh Rawat and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2020/06/Projects_Visual-textual-Capsule-Routing-for-Text-based-Video-Segmentation.pdf
https://www.crcv.ucf.edu/wp-content/uploads/2020/06/Projects_Visual-textual-Capsule-Routing-for-Text-based-Video-Segmentation_Supplementary.zip
https://www.crcv.ucf.edu/research/projects/visual-textual-capsule-routing-for-text-based-video-segmentation/},
year = {2020},
date = {2020-06-14},
publisher = {IEEE Conference on Computer Vision and Pattern Recognition (Oral)},
edition = {(Oral)},
abstract = {Joint understanding of vision and natural language is a challenging problem with a wide range of applications in artificial intelligence. In this work, we focus on integration of video and text for the task of actor and action video segmentation from a sentence. We propose a capsule-based approach which performs pixel-level localization based on a natural language query describing the actor of interest. We encode both the video and textual input in the form of capsules, which provide a more effective representation in comparison with standard convolution based features. Our novel visual-textual routing mechanism allows for the fusion of video and text capsules to successfully localize the actor and action. The existing works on actor-action localization are mainly focused on localization in a single frame instead of the full video. Different from existing works, we propose to perform the localization on all frames of the video. To validate the potential of the proposed network for actor and action video localization, we extend an existing actor-action dataset (A2D) with annotations for all the frames. The experimental evaluation demonstrates the effectiveness of our capsule network for text selective actor and action localization in videos. The proposed method also improves upon the performance of the existing state-of-the art works on single frame-based localization. },
keywords = {Capsule Networks, CVPR, Video Object Segmentation},
pubstate = {published},
tppubtype = {conference}
}
Rajasegaran, Jathushan; Khan, Salman; Hayat, Munawar; Khan, Fahad Shahbaz; Shah, Mubarak
iTAML : An Incremental Task-Agnostic Meta-learning Approach Conference
IEEE Conference on Computer Vision and Pattern Recognition, 2020.
Abstract | Tags: CVPR, Meta-Learning | Links:
@conference{Khan2020,
title = {iTAML : An Incremental Task-Agnostic Meta-learning Approach},
author = {Jathushan Rajasegaran and Salman Khan and Munawar Hayat and Fahad Shahbaz Khan and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2020/04/iTAML.pdf
https://www.crcv.ucf.edu/wp-content/uploads/2020/04/iTAML-Supplementary.pdf},
year = {2020},
date = {2020-06-14},
publisher = {IEEE Conference on Computer Vision and Pattern Recognition},
abstract = {Humans can continuously learn new knowledge as their experience grows. In contrast, previous learning in deep neural networks can quickly fade out when they are trained on a new task. In this paper, we hypothesize this problem can be avoided by learning a set of generalized parameters, that are neither specific to old nor new tasks. In this pursuit, we introduce a novel meta-learning approach that seeks to maintain an equilibrium between all the encountered tasks. This is ensured by a new meta-update rule which avoids catastrophic forgetting. In comparison to previous metalearning techniques, our approach is task-agnostic. When presented with a continuum of data, our model automatically identifies the task and quickly adapts to it with just a single update. We perform extensive experiments on five datasets in a class-incremental setting, leading to significant improvements over the state of the art methods (e.g., a 21.3% boost on CIFAR100 with 10 incremental tasks). Specifically, on large-scale datasets that generally prove difficult cases for incremental learning, our approach delivers absolute gains as high as 19.1% and 7.4% on ImageNet and MS-Celeb datasets, respectively. Our codes are available at: https://github.com/brjathu/iTAML. },
keywords = {CVPR, Meta-Learning},
pubstate = {published},
tppubtype = {conference}
}
Palazzo, Simone; Spampinato, Concetto; Kavasidis, Isaak; Giordano, Daniela; Schmidt, Joseph; Shah, Mubarak
Decoding Brain Representations by Multimodal Learning of Neural Activity and Visual Features Journal Article
In: IEEE Transactions on Pattern Analysis and Machine Intelligence, pp. 1 - 1, 2020.
Tags: EEG/Brain, Medical | Links:
@article{Palazzo2020,
title = {Decoding Brain Representations by Multimodal Learning of Neural Activity and Visual Features},
author = {Simone Palazzo and Concetto Spampinato and Isaak Kavasidis and Daniela Giordano and Joseph Schmidt and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2020/06/Publications_Decoding-Brain-Representations-by-Multimodal-Learning-of-Neural-Activity-and-Visual-Features.pdf},
year = {2020},
date = {2020-05-20},
urldate = {2020-05-20},
journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
pages = {1 - 1},
keywords = {EEG/Brain, Medical},
pubstate = {published},
tppubtype = {article}
}
Sharghi, Aidean; da Vitoria Lobo, Niels; Shah, Mubarak
Text Synopsis Generation for Egocentric Videos Conference
International Conference on Pattern Recognition, 2020.
Tags: Egocentric, ICPR, Text Synopsis, Video Summarization | Links:
@conference{Sharghi2020,
title = {Text Synopsis Generation for Egocentric Videos},
author = {Aidean Sharghi and Niels da Vitoria Lobo and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2020/05/Publications_Text-Synopsis-Generation-for-Egocentric-Videos.pdf
https://youtu.be/Z2Rpy7MC7QI},
year = {2020},
date = {2020-05-08},
booktitle = {International Conference on Pattern Recognition},
keywords = {Egocentric, ICPR, Text Synopsis, Video Summarization},
pubstate = {published},
tppubtype = {conference}
}
Rizve, Mamshad Nayeem; Demir, Ugur; Tirupattur, Praveen; Rana, Aayush; Duarte, Kevin; Dave, Ishan; Rawat, Yogesh Singh; Shah, Mubarak
Gabriella: An Online System for Real-Time Activity Detection in Untrimmed Security Videos Conference
25th International Conference on Pattern Recognition, Italy, 10-15 January 2021 (ICPR 2020), 2020.
Tags: Activity Detection, IARPA DIVA, ICPR | Links:
@conference{Rizve2020,
title = {Gabriella: An Online System for Real-Time Activity Detection in Untrimmed Security Videos},
author = {Mamshad Nayeem Rizve and Ugur Demir and Praveen Tirupattur and Aayush Rana and Kevin Duarte and Ishan Dave and Yogesh Singh Rawat and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2020/05/Gabriella.pdf
https://www.crcv.ucf.edu/research/projects/gabriella-an-online-system-for-real-time-activity-detection-in-untrimmed-security-videos/},
year = {2020},
date = {2020-04-23},
urldate = {2020-04-23},
booktitle = {25th International Conference on Pattern Recognition, Italy, 10-15 January 2021 (ICPR 2020)},
keywords = {Activity Detection, IARPA DIVA, ICPR},
pubstate = {published},
tppubtype = {conference}
}
Zaeemzadeh, Alireza; Rahnavard, Nazanin; Shah, Mubarak
Norm-Preservation: Why Residual Networks Can Become Extremely Deep? Journal Article
In: IEEE Transactions on Pattern Analysis and Machine Intelligence, 2020.
Tags: Deep Learning, RESNET | Links:
@article{Zaeemzadeh2020,
title = {Norm-Preservation: Why Residual Networks Can Become Extremely Deep?},
author = {Alireza Zaeemzadeh and Nazanin Rahnavard and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2020/05/Norm-Preservation.pdf},
year = {2020},
date = {2020-04-19},
urldate = {2020-04-19},
journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
keywords = {Deep Learning, RESNET},
pubstate = {published},
tppubtype = {article}
}
Gupta, Rohit; Shah, Mubarak
RescueNet: Joint Building Segmentation and Damage Assessment from Satellite Imagery Conference
International Conference on Pattern Recognition, 2020.
Tags: Geo-Localization, ICPR, Segmentation | Links:
@conference{Gupta2020,
title = {RescueNet: Joint Building Segmentation and Damage Assessment from Satellite Imagery},
author = {Rohit Gupta and Mubarak Shah},
url = {https://arxiv.org/pdf/2004.07312.pdf},
year = {2020},
date = {2020-04-15},
booktitle = {International Conference on Pattern Recognition},
keywords = {Geo-Localization, ICPR, Segmentation},
pubstate = {published},
tppubtype = {conference}
}
MS, Kalemaki; AH, Karantanas; D, Exarchos; ET, Detorakis; O, Zoras; K, Marias; Millo, Corina; Bagci, Ulas; I, Pallikaris; A, Stratis; I, Karatzanis; K, Perisinakis; P, Koutentakis; GA, Kontadakis; DA, Spandidos; A, Tsatsakis; Papadakis, Georgios Z.
PET/CT and PET/MRI in Optalhalmic Oncology Journal Article
In: International Journal of Oncology, 2020.
Tags: Medical | Links:
@article{MS2020,
title = {PET/CT and PET/MRI in Optalhalmic Oncology},
author = {Kalemaki MS and Karantanas AH and Exarchos D and Detorakis ET and Zoras O and Marias K and Corina Millo and Ulas Bagci and Pallikaris I and Stratis A and Karatzanis I and Perisinakis K and Koutentakis P and Kontadakis GA and Spandidos DA and Tsatsakis A and Georgios Z. Papadakis},
url = {https://doi.org/10.3892/ijo.2020.4955},
year = {2020},
date = {2020-03-09},
journal = {International Journal of Oncology},
keywords = {Medical},
pubstate = {published},
tppubtype = {article}
}
LaLonde, Rodney; Kandel, P.; Spampinato, Concetto; Wallace, M. B.; Bagci, Ulas
Diagnosing Colorectal Polyps in the Wild with Capsule Networks Journal Article
In: IEEE ISBI , 2020.
Tags: Medical | Links:
@article{LaLonde2020,
title = {Diagnosing Colorectal Polyps in the Wild with Capsule Networks},
author = {Rodney LaLonde and Kandel, P. and Concetto Spampinato and M.B. Wallace and Ulas Bagci },
url = {https://github.com/lalonderodney/D-Caps},
year = {2020},
date = {2020-03-02},
journal = {IEEE ISBI },
keywords = {Medical},
pubstate = {published},
tppubtype = {article}
}
Chen, Chen; Surette, Ray; Shah, Mubarak
Automated monitoring for security camera networks: promise from computer vision labs Journal Article
In: Security Journal, 2020.
Tags: Deep Learning | Links:
@article{Chen2020,
title = {Automated monitoring for security camera networks: promise from computer vision labs},
author = {Chen Chen and Ray Surette and Mubarak Shah
},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2020/08/Publications_Automated-monitoring-for-security-camera-networks.pdf},
year = {2020},
date = {2020-02-17},
journal = {Security Journal},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {article}
}
Edraki, Marzieh; Rahnavard, Nazanin; Shah, Mubarak
Subspace Capsule Network Conference
34th Conference on Association for the Advancement of Artificial Intelligence (AAAI 2020), New York, USA, 2020.
Tags: Capsule Networks | Links:
@conference{Edraki2019,
title = {Subspace Capsule Network},
author = {Marzieh Edraki and Nazanin Rahnavard and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2020/03/Projects_SubspaceCapsuleNetwork.pdf
https://www.crcv.ucf.edu/research/projects/subspace-capsule-network/},
year = {2020},
date = {2020-02-07},
booktitle = {34th Conference on Association for the Advancement of Artificial Intelligence (AAAI 2020), New York, USA},
keywords = {Capsule Networks},
pubstate = {published},
tppubtype = {conference}
}
Masoodi, S.; Razi, A.; Wright, C.; Gatlin, J.; Bagci, Ulas
Instance-level Microtubule Tracking Journal Article
In: IEEE Transactions on Medical Imaging, 2020.
Tags: Medical | Links:
@article{Masoodi2019,
title = {Instance-level Microtubule Tracking},
author = {Masoodi, S. and Razi, A. and Wright, C. and Gatlin, J. and Ulas Bagci},
url = {https://doi.org/10.1109/TMI.2019.2963865},
year = {2020},
date = {2020-01-03},
journal = {IEEE Transactions on Medical Imaging},
keywords = {Medical},
pubstate = {published},
tppubtype = {article}
}
Quintanilla, Erik; Rawat, Yogesh Singh; Sakryukin, Andrey; Shah, Mubarak; Kankanhalli, Mohan
Adversarial Learning for Personalized Tag Recommendation Journal Article
In: IEEE Transactions on Multimedia (TMM), 2020.
Tags: REU | Links:
@article{quintanilla2020adversarial,
title = {Adversarial Learning for Personalized Tag Recommendation},
author = {Erik Quintanilla and Yogesh Singh Rawat and Andrey Sakryukin and Mubarak Shah and Mohan Kankanhalli},
url = {https://arxiv.org/pdf/2004.00698.pdf
Code: https://github.com/vyzuer/ALTReco
},
year = {2020},
date = {2020-01-01},
journal = {IEEE Transactions on Multimedia (TMM)},
keywords = {REU},
pubstate = {published},
tppubtype = {article}
}
2019
Tirosh, A.; RaviPrakash, H.; Papadakis, Georgios Z.; Tatsi, C.; Belyavskaya, E.; Chahralampos, L.; Lodish, MB.; Bagci, Ulas; Stratakis, Constantine A.
Computerized Analysis of Brain MR parameters dynamics in young patients with Cushing-Syndrome – a case control study Journal Article
In: The Journal of Clinical Endocrinology and Metabolism, 2019.
Tags: Medical | Links:
@article{Tirosh2019,
title = {Computerized Analysis of Brain MR parameters dynamics in young patients with Cushing-Syndrome – a case control study},
author = {Tirosh, A. and RaviPrakash, H. and Georgios Z. Papadakis and Tatsi, C. and Belyavskaya, E. and Chahralampos, L. and Lodish, MB. and Ulas Bagci and Constantine A. Stratakis},
url = {https://doi.org/10.1210/clinem/dgz303},
year = {2019},
date = {2019-12-30},
journal = {The Journal of Clinical Endocrinology and Metabolism},
keywords = {Medical},
pubstate = {published},
tppubtype = {article}
}
Hoogenboom, S.; Bagci, Ulas; Wallace, M. B.
AI in Gastroenterology. Current State of Play and Potential. How will it affect our practice and when? Journal Article
In: Techniques in Gastrointestinal Endoscopy, 150634, 2019.
Tags: Medical | Links:
@article{Hoogenboom2019,
title = {AI in Gastroenterology. Current State of Play and Potential. How will it affect our practice and when? },
author = {Hoogenboom, S. and Ulas Bagci and M.B. Wallace},
url = {https://doi.org/10.1016/j.tgie.2019.150634},
year = {2019},
date = {2019-12-29},
journal = {Techniques in Gastrointestinal Endoscopy, 150634},
keywords = {Medical},
pubstate = {published},
tppubtype = {article}
}
Stember, JN.; Celik, Haydar; Krupinski, E.; Chang, P.; Mutasa, S.; Wood, Bradford; Lignelli, A.; Moonis, G.; Jambawalikar, S.; Bagci, Ulas
Eye-Tracking for Deep Learning Segmentation Using Convolutional Neural Networks: a proof-of-principle application to meningiomas Journal Article
In: Journal of Digital Imaging, 2019.
Tags: Medical | Links:
@article{Stember2019,
title = {Eye-Tracking for Deep Learning Segmentation Using Convolutional Neural Networks: a proof-of-principle application to meningiomas},
author = {Stember, JN. and Haydar Celik and Krupinski, E. and Chang, P. and Mutasa, S. and Bradford Wood and Lignelli, A. and Moonis, G. and S. Jambawalikar and Ulas Bagci
},
url = {https://doi.org/10.1007/s10278-019-00220-4},
year = {2019},
date = {2019-12-28},
journal = {Journal of Digital Imaging},
keywords = {Medical},
pubstate = {published},
tppubtype = {article}
}
Karaaslan, E.; Bagci, Ulas; Catbas, F. N.
Artificial Intelligence Assisted Infrastructure Assessment Using Mixed Reality Systems Journal Article
In: Journal of Transportation Research, 2019.
Tags: Medical | Links:
@article{Karaaslan2019,
title = {Artificial Intelligence Assisted Infrastructure Assessment Using Mixed Reality Systems},
author = {Karaaslan, E. and Ulas Bagci and Catbas, F.N.
},
url = {https://doi.org/10.1177%2F0361198119839988},
year = {2019},
date = {2019-12-27},
journal = {Journal of Transportation Research},
keywords = {Medical},
pubstate = {published},
tppubtype = {article}
}
Torosdagli, N.; Liberton, Denise; Verma, Payal; Sincan, Murat; Lee, Janice; Bagci, Ulas
Deep Geodesic Learning for Segmentation and Anatomical Landmarking Journal Article
In: IEEE Transactions on Medical Imaging, 2019.
Tags: Medical | Links:
@article{Torosdagli2019,
title = {Deep Geodesic Learning for Segmentation and Anatomical Landmarking},
author = {N. Torosdagli and Denise Liberton and Payal Verma and Murat Sincan and Janice Lee and Ulas Bagci
},
url = {https://doi.org/10.1109/TMI.2018.2875814},
year = {2019},
date = {2019-12-25},
journal = {IEEE Transactions on Medical Imaging},
keywords = {Medical},
pubstate = {published},
tppubtype = {article}
}
LaLonde, Rodney; Tanner, Irene; Nikiforaki, K.; Papadakis, Georgios Z.; Kandel, P.; Bolan, CW; Wallace, M. B.; Bagci, Ulas
INN: Inflated Neural Networks for IPMN Diagnosis Conference
MICCAI, 2019.
Tags: Medical | Links:
@conference{LaLonde2019,
title = {INN: Inflated Neural Networks for IPMN Diagnosis},
author = {Rodney LaLonde and Irene Tanner and Nikiforaki, K. and Georgios Z. Papadakis and Kandel, P. and Bolan, CW and M.B. Wallace and Ulas Bagci},
url = {https://doi.org/10.1007/978-3-030-32254-0_12},
year = {2019},
date = {2019-12-20},
booktitle = {MICCAI},
keywords = {Medical},
pubstate = {published},
tppubtype = {conference}
}
Khosravan, Naji; Mortazi, Aliasghar; Wallace, M. B.; Bagci, Ulas
PAN: Projective Adversarial Network for Medical Image Segmentation Conference
MICCAI, 2019.
Tags: Medical | Links:
@conference{Khosravan2019,
title = {PAN: Projective Adversarial Network for Medical Image Segmentation},
author = {Naji Khosravan and Aliasghar Mortazi and M.B. Wallace and Ulas Bagci
},
url = {https://doi.org/10.1007/978-3-030-32226-7_8},
year = {2019},
date = {2019-12-19},
booktitle = {MICCAI},
keywords = {Medical},
pubstate = {published},
tppubtype = {conference}
}
Mortazi, Aliasghar; Khosravan, Naji; Torigian, DA; Kurugol, S.; Bagci, Ulas
Weakly Supervised Segmentation by A Deep Geodesic Prior Conference
MICCAI 2019-MLMI, 2019.
Tags: Medical | Links:
@conference{Mortazi2019,
title = {Weakly Supervised Segmentation by A Deep Geodesic Prior},
author = {Aliasghar Mortazi and Naji Khosravan and Torigian, DA and Kurugol, S. and Ulas Bagci
},
url = {https://doi.org/10.1007/978-3-030-32692-0_28},
year = {2019},
date = {2019-12-17},
booktitle = {MICCAI 2019-MLMI},
keywords = {Medical},
pubstate = {published},
tppubtype = {conference}
}
Liu, Y.; Khosravan, Naji; Liu, Y.; Stember, J.; Bagci, Ulas; Jambawalikar, S.
Cross-modality Knowledge Transfer for Prostate Segmentation from CT Scans Conference
MICCAI 2019-DART, 2019.
Tags: Medical | Links:
@conference{Liu2019,
title = {Cross-modality Knowledge Transfer for Prostate Segmentation from CT Scans},
author = {Liu, Y. and Naji Khosravan and Liu, Y. and Stember, J. and Ulas Bagci and S. Jambawalikar},
url = {https://doi.org/10.1007/978-3-030-33391-1_8},
year = {2019},
date = {2019-12-15},
booktitle = {MICCAI 2019-DART},
keywords = {Medical},
pubstate = {published},
tppubtype = {conference}
}
Anwar, S.; Tooba, A.; Rafique, K.; RaviPrakash, H.; Mohy-ud-din, H.; Bagci, Ulas
A Survey on Recent Advancements for AI-Enabled Radiomics in Neuro-Oncology Conference
MICCAI 2019-RNO-AI, 2019.
Tags: Medical | Links:
@conference{Anwar2019,
title = {A Survey on Recent Advancements for AI-Enabled Radiomics in Neuro-Oncology},
author = {Anwar, S. and Tooba, A. and Rafique, K. and RaviPrakash, H. and Mohy-ud-din, H. and Ulas Bagci},
url = {https://doi.org/10.1007/978-3-030-40124-5_3},
year = {2019},
date = {2019-12-10},
booktitle = {MICCAI 2019-RNO-AI},
keywords = {Medical},
pubstate = {published},
tppubtype = {conference}
}
Mazaheri, Amir
Video Content Understanding Using Text PhD Thesis
University of Central Florida, 2019.
@phdthesis{Mazaheri2019b,
title = {Video Content Understanding Using Text},
author = {Amir Mazaheri},
url = {https://stars.library.ucf.edu/etd2020/99/},
year = {2019},
date = {2019-12-03},
urldate = {2019-12-02},
school = {University of Central Florida},
keywords = {},
pubstate = {published},
tppubtype = {phdthesis}
}
Khosravan, Naji
Collaborative Artificial Intelligence Algorithms For Medical Imaging Applications PhD Thesis
University of Central Florida, 2019.
@phdthesis{nokey,
title = {Collaborative Artificial Intelligence Algorithms For Medical Imaging Applications},
author = {Naji Khosravan},
url = {https://stars.library.ucf.edu/etd/6877/},
year = {2019},
date = {2019-12-02},
urldate = {2019-12-01},
school = {University of Central Florida},
keywords = {},
pubstate = {published},
tppubtype = {phdthesis}
}
Sharghi, Aidean
Visual-Textual Video Synopsis Generation PhD Thesis
University of Central Florida, 2019.
@phdthesis{nokey,
title = {Visual-Textual Video Synopsis Generation},
author = {Aidean Sharghi},
url = {https://stars.library.ucf.edu/etd/6716/},
year = {2019},
date = {2019-12-01},
school = {University of Central Florida},
keywords = {},
pubstate = {published},
tppubtype = {phdthesis}
}
Mortazi, Aliasghar
Optimization Algorithms for Deep Learning Based Medical Image Segmentations PhD Thesis
University of Central Florida, 2019.
@phdthesis{Mortazi2019b,
title = {Optimization Algorithms for Deep Learning Based Medical Image Segmentations},
author = {Aliasghar Mortazi},
url = {https://stars.library.ucf.edu/etd/6715/},
year = {2019},
date = {2019-12-01},
school = {University of Central Florida},
keywords = {},
pubstate = {published},
tppubtype = {phdthesis}
}
Khodadadeh, Siavash; Bölöni, Ladislau; Shah, Mubarak
Unsupervised Meta-Learning for Few-Shot Image Classification Conference
33rd Conference on Neural Information Processing Systems (NeurIPS 2019), Vancouver, Canada, 2019.
Tags: Meta-Learning | Links:
@conference{Khodadadeh2019,
title = {Unsupervised Meta-Learning for Few-Shot Image Classification},
author = {Siavash Khodadadeh and Ladislau Bölöni and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2020/01/Publications_unsupervised-meta-learning-for-few-shot-image-classification.pdf
https://www.crcv.ucf.edu/research/projects/unsupervised-meta-learning-for-few-shot-image-and-video-classification/},
year = {2019},
date = {2019-11-30},
booktitle = {33rd Conference on Neural Information Processing Systems (NeurIPS 2019), Vancouver, Canada},
journal = {33rd Conference on Neural Information Processing Systems (NeurIPS 2019), Vancouver, Canada},
keywords = {Meta-Learning},
pubstate = {published},
tppubtype = {conference}
}
Kalayeh, Mahdi M.; Shah, Mubarak
On Symbiosis of Attribute Prediction and Semantic Segmentation Journal Article
In: IEEE Transactions on Pattern Analysis and Machine Intelligence, Pages 1-1, DOI: 10.1109/TPAMI.2019.2956039, 2019.
Tags: Semantic Segmentation | Links:
@article{Kalayeh2019,
title = {On Symbiosis of Attribute Prediction and Semantic Segmentation},
author = {Mahdi M. Kalayeh and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2020/01/Publications_On-Symbiosis-of-Attribute-Prediction-and-Semantic-Segmentation.pdf},
year = {2019},
date = {2019-11-26},
journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence, Pages 1-1, DOI: 10.1109/TPAMI.2019.2956039},
keywords = {Semantic Segmentation},
pubstate = {published},
tppubtype = {article}
}
Sultani, Waqas; Shah, Mubarak
Human Action Recognition in Drone Videos using a Few Aerial Training Examples Conference
Cornell University Library, arXiv:1910.10027v1. [cs.CV], 2019.
Tags: Action Recognition | Links:
@conference{Sultani2019,
title = {Human Action Recognition in Drone Videos using a Few Aerial Training Examples},
author = {Waqas Sultani and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2019/10/1910.10027v1.pdf},
year = {2019},
date = {2019-11-04},
publisher = {Cornell University Library, arXiv:1910.10027v1. [cs.CV]},
keywords = {Action Recognition},
pubstate = {published},
tppubtype = {conference}
}
Arif, Maliha; Mahalanobis, Abhijit
View Prediction using manifold learning in non-linear feature subspace Proceedings
SPIE Symposium on Mulispectral Image Processing and Pattern Recognition, 2019.
Tags: Deep Learning | Links:
@proceedings{Maliha2019,
title = {View Prediction using manifold learning in non-linear feature subspace},
author = {Maliha Arif and Abhijit Mahalanobis},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/Final_Ours_SPIE_submission_version2.pdf},
doi = {10.1117/12.2539521},
year = {2019},
date = {2019-11-01},
publisher = {SPIE Symposium on Mulispectral Image Processing and Pattern Recognition},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {proceedings}
}
Aafaq, Nayyer; Mian, Ajmal; Liu, Wei; Gilani, Syed Zulqarnain; Shah, Mubarak
Video Description: A Survey of Methods, Datasets, and Evaluation Metrics Journal Article
In: ACM Comput. Surv. 52, 6, Article 115 (October 2019), 37 pages. DOI: https://doi.org/10.1145/3355390, 2019.
Tags: Video Description | Links:
@article{Aafaq2019,
title = {Video Description: A Survey of Methods, Datasets, and Evaluation Metrics},
author = {Nayyer Aafaq and Ajmal Mian and Wei Liu and Syed Zulqarnain Gilani and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/a115-aafaq.pdf
},
year = {2019},
date = {2019-10-31},
journal = {ACM Comput. Surv. 52, 6, Article 115 (October 2019), 37 pages. DOI: https://doi.org/10.1145/3355390},
publisher = {ACM Comput. Surv. 52, 6, Article 115 (October 2019), 37 pages. DOI: https://doi.org/10.1145/3355390},
keywords = {Video Description},
pubstate = {published},
tppubtype = {article}
}
Duarte, Kevin; Rawat, Yogesh Singh; Shah, Mubarak
CapsuleVOS: Semi-Supervised Video Object Segmentation Using Capsule Routing Conference
International Conference on Computer Vision (ICCV 2019), Seoul, South Korea, Oct 27-Nov 2, 2019.
Tags: Video Object Segmentation | Links:
@conference{Duarte2019,
title = {CapsuleVOS: Semi-Supervised Video Object Segmentation Using Capsule Routing},
author = {Kevin Duarte and Yogesh Singh Rawat and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2020/07/Projects_CapsuleVOS-Semi-Supervised-Video-Object-Segmentation-Using-Capsule-Routing.pdf
https://www.crcv.ucf.edu/research/projects/capsulevos-semi-supervised-video-object-segmentation-using-capsule-routing/},
year = {2019},
date = {2019-10-30},
publisher = { International Conference on Computer Vision (ICCV 2019), Seoul, South Korea, Oct 27-Nov 2},
keywords = {Video Object Segmentation},
pubstate = {published},
tppubtype = {conference}
}
Alemu, Leulseged Tesfaye; Pelillo, Marcello; Shah, Mubarak
Deep Constrained Dominant Sets for Person Re-Identification Conference
International Conference on Computer Vision (ICCV 2019), Seoul, South Korea, Oct 27-Nov 2, 2019.
Tags: Re-Identification | Links:
@conference{Alemu2019,
title = {Deep Constrained Dominant Sets for Person Re-Identification},
author = {Leulseged Tesfaye Alemu and Marcello Pelillo and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2019/08/Publications_Deep-Constrained-Dominant-Sets-for-Person-Re-Identification.pdf
https://www.crcv.ucf.edu/wp-content/uploads/2019/08/Publications_Deep-Constrained-Dominant-Sets-for-Person-Re-Identification_Supplementary.pdf
https://www.crcv.ucf.edu/research/projects/deep-constrained-dominant-sets-for-person-re-identification/},
year = {2019},
date = {2019-10-28},
publisher = {International Conference on Computer Vision (ICCV 2019), Seoul, South Korea, Oct 27-Nov 2},
keywords = {Re-Identification},
pubstate = {published},
tppubtype = {conference}
}
Regmi, Krishna; Shah, Mubarak
Bridging the Domain Gap for Ground-to-Aerial Image Matching Conference
International Conference on Computer Vision (ICCV 2019), Seoul, South Korea, Oct 27-Nov 2, 2019.
Tags: Cross-View Image Retrieval, Geo-Localization | Links:
@conference{Regmi2019,
title = { Bridging the Domain Gap for Ground-to-Aerial Image Matching},
author = {Krishna Regmi and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2020/02/Publications_Bridging-the-Domain-Gap-for-Ground-to-Aerial-Image-Matching_Final.pdf
https://www.crcv.ucf.edu/research/projects/bridging-the-domain-gap-for-ground-to-aerial-image-matching/
},
year = {2019},
date = {2019-10-27},
publisher = {International Conference on Computer Vision (ICCV 2019), Seoul, South Korea, Oct 27-Nov 2},
keywords = {Cross-View Image Retrieval, Geo-Localization},
pubstate = {published},
tppubtype = {conference}
}
Spampinato, Concetto; Palazzo, Simone; D’Oro, P.; Giordano, Daniela; Shah, Mubarak
Adversarial Framework for Unsupervised Learning of Motion Dynamics in Videos Journal Article
In: International Journal of Computer Vision, 1-20, 2019.
Tags: Unsupervised Learning | Links:
@article{Spampinato2019,
title = {Adversarial Framework for Unsupervised Learning of Motion Dynamics in Videos},
author = {Concetto Spampinato and Simone Palazzo and P. D’Oro and Daniela Giordano and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2020/05/Publications_Adversarial-Framework-for-Unsupervised-Learning-of-Motion-Dynamics-in-Videos.pdf},
year = {2019},
date = {2019-10-08},
journal = {International Journal of Computer Vision, 1-20},
keywords = {Unsupervised Learning},
pubstate = {published},
tppubtype = {article}
}
Hou, Rui; Chen, Chen; Sukthankar, Rahul; Shah, Mubarak
An Efficient 3D CNN for Action/Object Segmentation in Video Conference
British Machine Vision Conference (BMVC 2019), UK, Sep 9-10, 2019.
Tags: BMVC, Video Object Segmentation | Links:
@conference{Hou2019,
title = { An Efficient 3D CNN for Action/Object Segmentation in Video},
author = {Rui Hou and Chen Chen and Rahul Sukthankar and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2019/07/Publications_An-Efficient-3D-CNN-for-Action-Object-Segmentation-in-Video.pdf
https://www.crcv.ucf.edu/research/projects/an-efficient-3d-cnn-for-action-object-segmentation-in-video/},
year = {2019},
date = {2019-09-10},
publisher = {British Machine Vision Conference (BMVC 2019), UK, Sep 9-10},
keywords = {BMVC, Video Object Segmentation},
pubstate = {published},
tppubtype = {conference}
}
Tesfaye, Yonatan Tariku; Zemene, Eyasu; Prati, Andrea; Pelillo, Marcello; Shah, Mubarak
Multi-target tracking in multiple non-overlapping cameras using constrained dominant sets Journal Article
In: International Journal for Computer Vision (IJCV), September 2019, Volume 127, Issue 9, pp1303-1320., 2019.
Tags: Clustering, Dominant Sets, Reranking, Tracking, Video Re-ID | Links:
@article{Tesfaye2019,
title = { Multi-target tracking in multiple non-overlapping cameras using constrained dominant sets},
author = {Yonatan Tariku Tesfaye and Eyasu Zemene and Andrea Prati and Marcello Pelillo and Mubarak Shah },
url = {https://www.crcv.ucf.edu/wp-content/uploads/2019/06/Projects_MultiTargetTrackingConstrainedDominantSets.pdf
https://www.crcv.ucf.edu/home/projects/multi-target-tracking-in-non-overlapping-cameras-using-fast-constrained-dominant-sets/},
year = {2019},
date = {2019-09-09},
urldate = {2019-09-09},
journal = {International Journal for Computer Vision (IJCV), September 2019, Volume 127, Issue 9, pp1303-1320.},
keywords = {Clustering, Dominant Sets, Reranking, Tracking, Video Re-ID},
pubstate = {published},
tppubtype = {article}
}
LaPlace, Cecilia; Khan, Aisha Urooj; Borji, Ali
Segmenting Sky Pixels in Images: Analysis and Comparison Conference
IEEE Winter Conference on Applications of Computer Vision, 2019.
Tags: REU
@conference{LaPlace2019,
title = {Segmenting Sky Pixels in Images: Analysis and Comparison},
author = {Cecilia LaPlace and Aisha Urooj Khan and Ali Borji },
year = {2019},
date = {2019-08-02},
booktitle = {IEEE Winter Conference on Applications of Computer Vision},
keywords = {REU},
pubstate = {published},
tppubtype = {conference}
}
Hou, Rui
Action Recognition, Temporal Localization and Detection in Trimmed and Untrimmed Video PhD Thesis
University of Central Florida, 2019.
@phdthesis{Hou2019b,
title = {Action Recognition, Temporal Localization and Detection in Trimmed and Untrimmed Video},
author = {Rui Hou},
url = {https://stars.library.ucf.edu/etd/6507/},
year = {2019},
date = {2019-08-01},
school = {University of Central Florida},
keywords = {},
pubstate = {published},
tppubtype = {phdthesis}
}
Sun, ShiJie; Akhtar, Naveed; Song, HuanSheng; Mian, Ajmal; Shah, Mubarak
Deep Affinity Network for Multiple Object Tracking Journal Article
In: IEEE Transactions on Pattern Analysis and Machine Intelligence, 2019.
Tags: Tracking | Links:
@article{Sun2019,
title = {Deep Affinity Network for Multiple Object Tracking},
author = {ShiJie Sun and Naveed Akhtar and HuanSheng Song and Ajmal Mian and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2020/05/Publications_Deep-Affinity-Network-for-Multiple-Object-Tracking.pdf},
year = {2019},
date = {2019-07-19},
journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
keywords = {Tracking},
pubstate = {published},
tppubtype = {article}
}
Abolghasemi, Pooya; Mazaheri, Amir; Shah, Mubarak; Boloni, Ladislau
Pay Attention! – Robustifying a Deep Visuomotor Policy Through Task-Focused Visual Attention Conference
Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), 2019.
Tags: Robotics | Links:
@conference{abolghasemi2019payattention,
title = {Pay Attention! – Robustifying a Deep Visuomotor Policy Through Task-Focused Visual Attention},
author = {Pooya Abolghasemi and Amir Mazaheri and Mubarak Shah and Ladislau Boloni},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2019/05/Publications_Pay_attention.pdf
https://www.crcv.ucf.edu/home/projects/pay-attention/},
year = {2019},
date = {2019-06-08},
publisher = {Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR)},
keywords = {Robotics},
pubstate = {published},
tppubtype = {conference}
}
Zaeemzadeh, Alireza; Joneidi, Mohsen; Rahnavard, Nazanin; Shah, Mubarak
Iterative Projection and Matching: Finding Structure-preserving Representatives and Its Application to Computer Visio Conference
Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR), 2019.
Tags: Iterative Projection and Matching | Links:
@conference{zaeemzadeh2019ipm,
title = {Iterative Projection and Matching: Finding Structure-preserving Representatives and Its Application to Computer Visio},
author = {Alireza Zaeemzadeh and Mohsen Joneidi and Nazanin Rahnavard and Mubarak Shah},
url = {https://arxiv.org/pdf/1811.12326.pdf
https://www.crcv.ucf.edu/home/projects/iterative-projection-and-matching/
https://www.crcv.ucf.edu/wp-content/uploads/2019/05/Projects_IterativeProjectionMatching_Bibtex.txt},
year = {2019},
date = {2019-06-07},
publisher = {Proceedings of the IEEE conference on computer vision and pattern recognition (CVPR)},
keywords = {Iterative Projection and Matching},
pubstate = {published},
tppubtype = {conference}
}
Khosravan, Naji; Celik, Haydar; Turkbey, Baris; EC, Jones; Wood, Bradford; Bagci, Ulas
A collaborative computer aided diagnosis (C-CAD) system with eye-tracking, sparse attentional model, and deep learning Journal Article
In: Medical image analysis. 2019 Jan 1;51:101-15., 2019.
Tags: Attention, Eye-Tracking, Medical | Links:
@article{N2019,
title = { A collaborative computer aided diagnosis (C-CAD) system with eye-tracking, sparse attentional model, and deep learning},
author = {Naji Khosravan and Haydar Celik and Baris Turkbey and Jones EC and Bradford Wood and Ulas Bagci},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2019/05/Publications_C-CAD.pdf
},
year = {2019},
date = {2019-06-06},
journal = {Medical image analysis. 2019 Jan 1;51:101-15.},
keywords = {Attention, Eye-Tracking, Medical},
pubstate = {published},
tppubtype = {article}
}
Mahalanobis, Abhijit
An overview of some techniques for the detection and recognition of objects in 3D data Proceedings
OSA Imaging and Applied Optics Congress, 2019.
Tags: Deep Learning
@proceedings{Mahalanobis2019,
title = {An overview of some techniques for the detection and recognition of objects in 3D data},
author = {Abhijit Mahalanobis},
year = {2019},
date = {2019-06-01},
publisher = {OSA Imaging and Applied Optics Congress},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {proceedings}
}
Mahalanobis, Abhijit; McIntosh, Bruce
A comparison of target detection algorithms using DSIAC ATR algorithm development data set, Proceedings
Automatic Target Recognition XXIX, vol. 10988, 2019.
Tags: Deep Learning
@proceedings{Mahalanobis2019b,
title = {A comparison of target detection algorithms using DSIAC ATR algorithm development data set,},
author = {Abhijit Mahalanobis and Bruce McIntosh},
year = {2019},
date = {2019-05-14},
volume = {10988},
publisher = {Automatic Target Recognition XXIX},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {proceedings}
}
Tanner, Irene; Mahalanobis, Abhijit
Fundamentals of target classification using deep learning Proceedings
Automatic Target Recognition XXIX, vol. 10988, 2019.
Tags: Deep Learning, REU
@proceedings{Tanner2019,
title = {Fundamentals of target classification using deep learning},
author = {Irene Tanner and Abhijit Mahalanobis},
year = {2019},
date = {2019-05-14},
urldate = {2019-05-14},
volume = {10988},
publisher = {Automatic Target Recognition XXIX},
keywords = {Deep Learning, REU},
pubstate = {published},
tppubtype = {proceedings}
}
Mahmoudkalayeh, Mahdi
Describing Images by Semantic Modeling Using Attributes and Tags PhD Thesis
University of Central Florida, 2019.
@phdthesis{Mahmoudkalayeh2019,
title = {Describing Images by Semantic Modeling Using Attributes and Tags},
author = {Mahdi Mahmoudkalayeh},
url = {https://stars.library.ucf.edu/etd/6296/},
year = {2019},
date = {2019-05-01},
school = {University of Central Florida},
keywords = {},
pubstate = {published},
tppubtype = {phdthesis}
}
I, Irmakci; Hussein, Sarfaraz; A, Savran; RR, Kalyani; D, Reiter; CW, Chia; KW, Fishbein; RG, Spencer; L, Ferrucci; Bagci, Ulas
A Novel Extension to Fuzzy Connectivity for Body Composition Analysis: Applications in Thigh, Brain, and Whole Body Tissue Segmentation Journal Article
In: IEEE Transactions on Biomedical Engineering. 2019 Apr;66(4):1069-81., 2019.
Tags: Medical, Segmentation | Links:
@article{I2019d,
title = { A Novel Extension to Fuzzy Connectivity for Body Composition Analysis: Applications in Thigh, Brain, and Whole Body Tissue Segmentation},
author = {Irmakci I and Sarfaraz Hussein and Savran A and Kalyani RR and Reiter D and Chia CW and Fishbein KW and Spencer RG and Ferrucci L and Ulas Bagci},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2019/05/Publications_NovelExtensionToFuzzyConnectivity.pdf
},
year = {2019},
date = {2019-04-30},
journal = { IEEE Transactions on Biomedical Engineering. 2019 Apr;66(4):1069-81.},
keywords = {Medical, Segmentation},
pubstate = {published},
tppubtype = {article}
}
Bagci, Ulas; Hussein, Sarfaraz
System and method for image-based quantification of white and brown adipose tissue at the whole-body, organ and body-region levels Journal Article
In: United States patent application US 10/157,462. 2018 Dec 18, 2019.
Tags: Medical, Segmentation | Links:
@article{U2018,
title = { System and method for image-based quantification of white and brown adipose tissue at the whole-body, organ and body-region levels},
author = {Ulas Bagci and Sarfaraz Hussein},
editor = {University of Central Florida Research Foundation Inc (UCFRF) and assignee},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2019/05/Publications_System-and-method-for-image-based-quantification-of-white-and-brown-adipose-tissue-at-the-whole-body-organ-and-body-region-levels.pdf},
year = {2019},
date = {2019-04-29},
journal = {United States patent application US 10/157,462. 2018 Dec 18},
keywords = {Medical, Segmentation},
pubstate = {published},
tppubtype = {article}
}
Hussein, Sarfaraz; P, Kandel; CW, Bolan; Wallace, M. B.; Bagci, Ulas
Lung and pancreatic tumor characterization in the deep learning era: novel supervised and unsupervised learning approaches Journal Article
In: IEEE transactions on medical imaging. 2019 Jan 23., 2019.
Tags: Classification, Medical | Links:
@article{S2019,
title = { Lung and pancreatic tumor characterization in the deep learning era: novel supervised and unsupervised learning approaches},
author = {Sarfaraz Hussein and Kandel P and Bolan CW and M.B. Wallace and Ulas Bagci},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2019/05/Publications_Lung-and-Pancreatic-Tumor-Characterization-in-the-Deep-Learning-.pdf
},
year = {2019},
date = {2019-04-28},
journal = { IEEE transactions on medical imaging. 2019 Jan 23.},
keywords = {Classification, Medical},
pubstate = {published},
tppubtype = {article}
}
H, Bogunovic; F, Venhuizen; S, Klimscha; S, Apostolopoulos; A, Bab-Hadiashar; Bagci, Ulas; MF, Beg; L, Bekalo; Q, Chen; C, Ciller; K, Gopinath
RETOUCH-The Retinal OCT Fluid Detection and Segmentation Benchmark and Challenge Journal Article
In: IEEE transactions on medical imaging. 2019 Feb 26., 2019.
Tags: Detection, Segmentation | Links:
@article{H2019b,
title = {RETOUCH-The Retinal OCT Fluid Detection and Segmentation Benchmark and Challenge},
author = {Bogunovic H and Venhuizen F and Klimscha S and Apostolopoulos S and Bab-Hadiashar A and Ulas Bagci and Beg MF and Bekalo L and Chen Q and Ciller C and Gopinath K},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2019/05/Publications_RETOUCH.pdf},
year = {2019},
date = {2019-04-27},
journal = {IEEE transactions on medical imaging. 2019 Feb 26.},
keywords = {Detection, Segmentation},
pubstate = {published},
tppubtype = {article}
}
Vaca-Castano, Gonzalo; da Vitoria Lobo, Niels; Shah, Mubarak
Holistic Object Detection and Image Understanding Journal Article
In: Computer Vision and Image Understanding, vol. vol. 181, pp. 1-13, 2019.
@article{nokey,
title = {Holistic Object Detection and Image Understanding},
author = {Gonzalo Vaca-Castano and Niels da Vitoria Lobo and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/Publications_Holistic-object-detection-and-image-understanding.pdf
https://www.crcv.ucf.edu/research/projects/holistic-object-detection-and-image-understanding/},
doi = {https://doi.org/10.1016/j.cviu.2019.02.006},
year = {2019},
date = {2019-04-01},
urldate = {2019-04-01},
journal = {Computer Vision and Image Understanding},
volume = {vol. 181},
pages = {1-13},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
DG, Vinsard; Y, Mori; M, Misawa; SE, Kudo; A, Rastogi; Bagci, Ulas; DK, Rex; Wallace, M. B.
Quality Assurance of Computer-Aided Detection and Diagnosis in Colonoscopy Journal Article
In: Gastrointestinal Endoscopy. 2019 Mar 26., 2019.
Tags: Detection, Medical | Links:
@article{DG2019,
title = {Quality Assurance of Computer-Aided Detection and Diagnosis in Colonoscopy},
author = {Vinsard DG and Mori Y and Misawa M and Kudo SE and Rastogi A and Ulas Bagci and Rex DK and M.B. Wallace},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2019/05/Publications_Quality-Assurance-of-Computer-Aided-Detection-and-Diagnosis-in-Colonoscopy.pdf},
year = {2019},
date = {2019-03-26},
journal = {Gastrointestinal Endoscopy. 2019 Mar 26.},
keywords = {Detection, Medical},
pubstate = {published},
tppubtype = {article}
}
Rawat, Yogesh Singh; Shah, Mubarak; Kankanhalli, Mohan
Photography and Exploration of Tourist Locations Based on Optimal Foraging Theory Journal Article
In: IEEE Transactions on Circuits and Systems for Video Technology, 2019.
@article{rawat2019photography,
title = {Photography and Exploration of Tourist Locations Based on Optimal Foraging Theory},
author = {Yogesh Singh Rawat and Mubarak Shah and Mohan Kankanhalli},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2019/09/NsfProjects_BigData_Photography-and-Exploration-of-Tourist-Locations-Based-on-Optimal-Foraging-Theory.pdf
https://github.com/vyzuer/foraging_theory},
year = {2019},
date = {2019-01-01},
journal = {IEEE Transactions on Circuits and Systems for Video Technology},
publisher = {IEEE},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Lobo, Niels Da Vitoria; Shah, Mubarak
UCF's 30-Year REU Site in Computer Vision Journal Article
In: Communications of the ACM, January 2019, Vol. 62 No. 1, Pages 31-34, 2019.
@article{Lobo2019,
title = {UCF's 30-Year REU Site in Computer Vision},
author = {Niels Da Vitoria Lobo and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2020/05/Publications_UCF30YearREUSiteinComputerVision.pdf},
year = {2019},
date = {2019-01-01},
journal = {Communications of the ACM, January 2019, Vol. 62 No. 1, Pages 31-34},
keywords = {NSF, REU},
pubstate = {published},
tppubtype = {article}
}
Rana, Aayush; Tirupattur, Praveen; Rizve, Mamshad Nayeem; Duarte, Kevin; Demir, Ugur; Rawat, Yogesh Singh; Shah, Mubarak
An Online System for Real-Time Activity Detection in Untrimmed Surveillance Videos Journal Article
In: 2019.
Tags:
@article{ranaonline,
title = {An Online System for Real-Time Activity Detection in Untrimmed Surveillance Videos},
author = {Aayush Rana and Praveen Tirupattur and Mamshad Nayeem Rizve and Kevin Duarte and Ugur Demir and Yogesh Singh Rawat and Mubarak Shah},
year = {2019},
date = {2019-00-00},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2018
Tian, Yicong
Human Action Detection, Tracking and Segmentation in Videos PhD Thesis
University of Central Florida, 2018.
@phdthesis{Tian2018b,
title = {Human Action Detection, Tracking and Segmentation in Videos},
author = {Yicong Tian},
url = {https://stars.library.ucf.edu/etd/6159/},
year = {2018},
date = {2018-12-31},
urldate = {2018-12-31},
school = {University of Central Florida},
keywords = {},
pubstate = {published},
tppubtype = {phdthesis}
}
Ardeshir, Shervin
Relating First-person and Third-person Vision PhD Thesis
University of Central Florida, 2018.
@phdthesis{Ardeshir0000,
title = {Relating First-person and Third-person Vision},
author = {Shervin Ardeshir},
url = {https://stars.library.ucf.edu/cgi/viewcontent.cgi?article=6960&context=etd},
year = {2018},
date = {2018-08-02},
urldate = {2018-08-01},
school = {University of Central Florida},
keywords = {},
pubstate = {published},
tppubtype = {phdthesis}
}
Hussein, Sarfaraz
Learning Algorithms for Fat Quantification and Tumor Characterization PhD Thesis
University of Central Florida, 2018.
@phdthesis{Hussein0000,
title = {Learning Algorithms for Fat Quantification and Tumor Characterization},
author = {Sarfaraz Hussein},
url = {https://www.crcv.ucf.edu/papers/theses/Hussein},
year = {2018},
date = {2018-08-01},
urldate = {2018-08-01},
school = {University of Central Florida},
keywords = {},
pubstate = {published},
tppubtype = {phdthesis}
}
Maria Mosquera Chuquicusma,; Hussein, Sarfaraz; Bagci, Ulas
How to Fool Radiologists with Generative Adversarial Networks? A Visual Turing Test for Lung Cancer Diagnosis Conference
IEEE 15th International Symposium, 2018.
Tags: REU
@conference{Chuquicusma2018,
title = {How to Fool Radiologists with Generative Adversarial Networks? A Visual Turing Test for Lung Cancer Diagnosis},
author = {Maria Mosquera Chuquicusma, and Sarfaraz Hussein and Ulas Bagci },
year = {2018},
date = {2018-08-01},
booktitle = {IEEE 15th International Symposium},
keywords = {REU},
pubstate = {published},
tppubtype = {conference}
}
Mazaheri, Amir; Gong, Boqing; Shah, Mubarak
Learning a Multi-Concept Video Retrieval Model with Multiple Latent Variables Journal Article
In: ACM Trans. Multimedia Comput. Commun. Appl. 14, 2, Article 46 (April 2018), 21 pages, 2018.
Tags: Video Retrieval | Links:
@article{Mazaheri2019,
title = { Learning a Multi-Concept Video Retrieval Model with Multiple Latent Variables},
author = {Amir Mazaheri and Boqing Gong and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/acm-tomm-14.2.46.pdf},
year = {2018},
date = {2018-04-14},
journal = {ACM Trans. Multimedia Comput. Commun. Appl. 14, 2, Article 46 (April 2018), 21 pages},
keywords = {Video Retrieval},
pubstate = {published},
tppubtype = {article}
}
Tirupattur, Praveen; Rawat, Yogesh Singh; Spampinato, Concetto; Shah, Mubarak
ThoughtViz: Visualizing Human Thoughts Using Generative Adversarial Network Conference
ACM Multimedia 2018, Seoul, Korea, October 22-26, 2018., 2018.
Tags: EEG/Brain, Gans | Links:
@conference{Tirupattur2019,
title = { ThoughtViz: Visualizing Human Thoughts Using Generative Adversarial Network},
author = {Praveen Tirupattur and Yogesh Singh Rawat and Concetto Spampinato and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/acmmm18/thoughtviz.pdf},
year = {2018},
date = {2018-04-13},
urldate = {2018-04-13},
journal = {ACM Multimedia 2018, Seoul, Korea, October 22-26, 2018},
publisher = {ACM Multimedia 2018, Seoul, Korea, October 22-26, 2018.},
keywords = {EEG/Brain, Gans},
pubstate = {published},
tppubtype = {conference}
}
Mazaheri, Amir; Shah, Mubarak
Visual Text Correction Conference
Proceedings of IEEE European Conference on Computer Vision (ECCV 2018), Munich, Germany, September 8-14, 2018., 2018.
Tags: Video Description | Links:
@conference{Mazaheri2018,
title = {Visual Text Correction},
author = {Amir Mazaheri and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/eccv2018/vtc.pdf
https://amirmazaheri1990.github.io/VTC/},
year = {2018},
date = {2018-04-12},
journal = {in Proceedings of IEEE European Conference on Computer Vision (ECCV 2018), Munich, Germany, September 8-14, 2018.},
publisher = {Proceedings of IEEE European Conference on Computer Vision (ECCV 2018), Munich, Germany, September 8-14, 2018.},
keywords = {Video Description},
pubstate = {published},
tppubtype = {conference}
}
Idrees, Haroon; Tayyab, Muhmmad; Athrey, Kishan; Zhang, Dong; Al-Maadeed, Somaya; Rajpoot, Nasir; Shah, Mubarak
Composition Loss for Counting, Density Map Estimation and Localization in Dense Crowds Conference
IEEE European Conference on Computer Vision (ECCV 2018), Munich, Germany, September 8-14, 2018., 2018.
Tags: Crowd Counting | Links:
@conference{Idrees2018,
title = { Composition Loss for Counting, Density Map Estimation and Localization in Dense Crowds},
author = {Haroon Idrees and Muhmmad Tayyab and Kishan Athrey and Dong Zhang and Somaya Al-Maadeed and Nasir Rajpoot and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/eccv2018/2324.pdf
},
year = {2018},
date = {2018-04-11},
publisher = { IEEE European Conference on Computer Vision (ECCV 2018), Munich, Germany, September 8-14, 2018.},
keywords = {Crowd Counting},
pubstate = {published},
tppubtype = {conference}
}
Tian, Yicong; Dehghan, Afshin; Shah, Mubarak
"On Detection, Data Association and Segmentation for Multi-target Tracking" in IEEE Transactions on Pattern Analysis and Machine Intelligence Journal Article
In: 2018.
Tags: Tracking | Links:
@article{Tian2018,
title = {"On Detection, Data Association and Segmentation for Multi-target Tracking" in IEEE Transactions on Pattern Analysis and Machine Intelligence},
author = {Yicong Tian and Afshin Dehghan and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/08392447.pdf
https://ieeexplore.ieee.org/abstract/document/8392447/
},
year = {2018},
date = {2018-04-10},
keywords = {Tracking},
pubstate = {published},
tppubtype = {article}
}
Jamal, M.; Li, H.; Gong, Boqing
Deep Face Detector Adaptation Without Negative Transfer or Catastrophic Forgetting Conference
IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2018), Salt Lake City, UT, June 18-22, 2018., 2018.
Tags: Detection | Links:
@conference{Jamal2019,
title = {Deep Face Detector Adaptation Without Negative Transfer or Catastrophic Forgetting},
author = {M. Jamal and H. Li and Boqing Gong},
url = {https://www.crcv.ucf.edu/papers/cvpr2018/deep-face.pdf},
year = {2018},
date = {2018-04-09},
publisher = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2018), Salt Lake City, UT, June 18-22, 2018.},
keywords = {Detection},
pubstate = {published},
tppubtype = {conference}
}
LaLonde, Rodney; Zhang, Dong; Shah, Mubarak
ClusterNet: Detecting Small Objects in Large Scenes by Exploiting Spatio-Temporal Information Conference
IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2018), Salt Lake City, UT, June 18-22, 2018, 2018.
Tags: Detection, Drone Video Analysis, UAV Video Analysis | Links:
@conference{LaLonde2018,
title = {ClusterNet: Detecting Small Objects in Large Scenes by Exploiting Spatio-Temporal Information},
author = {Rodney LaLonde and Dong Zhang and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/cvpr2018/3460Final.pdf
https://www.crcv.ucf.edu/papers/cvpr2018/3460-suppFinal.pdf},
year = {2018},
date = {2018-04-08},
urldate = {2018-04-08},
publisher = { IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2018), Salt Lake City, UT, June 18-22, 2018},
keywords = {Detection, Drone Video Analysis, UAV Video Analysis},
pubstate = {published},
tppubtype = {conference}
}
Wang, Tiantian; Zhang, Lihe; Wang, Shuo; Lu, Huchuan; Yang, Gang; Ruan, Xiang; Borji, Ali
Detect Globally, Refine Locally: A Novel Approach to Saliency Detection Conference
IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2018), Salt Lake City, UT, June 18-22, 2018., 2018.
Tags: Saliency Detection | Links:
@conference{Wang2018,
title = { Detect Globally, Refine Locally: A Novel Approach to Saliency Detection},
author = {Tiantian Wang and Lihe Zhang and Shuo Wang and Huchuan Lu and Gang Yang and Xiang Ruan and Ali Borji},
url = {https://www.crcv.ucf.edu/papers/cvpr2018/camera_ready.pdf
},
year = {2018},
date = {2018-04-07},
publisher = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2018), Salt Lake City, UT, June 18-22, 2018.},
keywords = {Saliency Detection},
pubstate = {published},
tppubtype = {conference}
}
Regmi, Krishna; Borji, Ali
Cross-View Image Synthesis Using Conditional GANs Conference
IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2018), Salt Lake City, UT, June 18-22, 2018, 2018.
Tags: Cross-View Image Synthesis, Geo-Localization | Links:
@conference{Regmi2018,
title = { Cross-View Image Synthesis Using Conditional GANs},
author = {Krishna Regmi and Ali Borji},
url = {https://www.crcv.ucf.edu/papers/cvpr2018/cross-view.pdf
https://www.crcv.ucf.edu/research/projects/cross-view-image-synthesis/},
year = {2018},
date = {2018-04-06},
urldate = {2018-04-06},
publisher = { IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2018), Salt Lake City, UT, June 18-22, 2018},
keywords = {Cross-View Image Synthesis, Geo-Localization},
pubstate = {published},
tppubtype = {conference}
}
Khan, Aisha Urooj; Borji, Ali
Analysis of Hand Segmentation in the Wild Conference
IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2018), Salt Lake City, UT, June 18-22, 2018, 2018.
Tags: Segmentation | Links:
@conference{Urooj2018,
title = { Analysis of Hand Segmentation in the Wild},
author = {Aisha Urooj Khan and Ali Borji},
url = {https://www.crcv.ucf.edu/papers/cvpr2018/hand-segmentation.pdf},
year = {2018},
date = {2018-04-05},
publisher = { IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2018), Salt Lake City, UT, June 18-22, 2018},
keywords = {Segmentation},
pubstate = {published},
tppubtype = {conference}
}
Wang, Wenguan; Shen, Jianbing; Guo, Fang; Cheng, Ming-Ming; Borji, Ali
Revisiting Video Saliency: A Large-Scale Benchmark and a New Model Conference
IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2018), Salt Lake City, UT, June 18-22, 2018., 2018.
Tags: Saliency Detection | Links:
@conference{Wang2018b,
title = { Revisiting Video Saliency: A Large-Scale Benchmark and a New Model},
author = {Wenguan Wang and Jianbing Shen and Fang Guo and Ming-Ming Cheng and Ali Borji},
url = {https://www.crcv.ucf.edu/papers/cvpr2018/revisiting-saliency.pdf},
year = {2018},
date = {2018-04-04},
publisher = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2018), Salt Lake City, UT, June 18-22, 2018.},
keywords = {Saliency Detection},
pubstate = {published},
tppubtype = {conference}
}
Zeng, Yu; Lu, Huchuan; Zhang, Lihe; Feng, Mengyang; Borji, Ali
Learning to Promote Saliency Detectors Conference
IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2018), Salt Lake City, UT, June 18-22, 2018., 2018.
Tags: Saliency Detection | Links:
@conference{Zeng2018,
title = {Learning to Promote Saliency Detectors},
author = {Yu Zeng and Huchuan Lu and Lihe Zhang and Mengyang Feng and Ali Borji},
url = {https://www.crcv.ucf.edu/papers/cvpr2018/1757.pdf},
year = {2018},
date = {2018-04-03},
publisher = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2018), Salt Lake City, UT, June 18-22, 2018.},
keywords = {Saliency Detection},
pubstate = {published},
tppubtype = {conference}
}
Sultani, Waqas; Chen, Chen; Shah, Mubarak
Real-world Anomaly Detection in Surveillance Videos Conference
IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2018), Salt Lake City, UT, June 18-22, 2018., 2018.
Tags: Anomaly Detection | Links:
@conference{Sultani2018,
title = {Real-world Anomaly Detection in Surveillance Videos},
author = {Waqas Sultani and Chen Chen and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/anomaly_detection.pdf
},
year = {2018},
date = {2018-04-03},
publisher = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2018), Salt Lake City, UT, June 18-22, 2018.},
keywords = {Anomaly Detection},
pubstate = {published},
tppubtype = {conference}
}
Zaeemzadeh, Alireza; Rahnavard, Nazanin; Shah, Mubarak
Norm-Preservation: Why Residual Networks Can Become Extremely Deep? Conference
Cornell University Library, 2018.
Tags: Deep Learning | Links:
@conference{Zaeemzadeh2018,
title = {Norm-Preservation: Why Residual Networks Can Become Extremely Deep?},
author = {Alireza Zaeemzadeh and Nazanin Rahnavard and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/arxiv_files/1805.07477.pdf},
year = {2018},
date = {2018-04-02},
publisher = {Cornell University Library},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Duarte, Kevin; Rawat, Yogesh Singh; Shah, Mubarak
VideoCapsuleNet: A Simplified Network for Action Detection Conference
In Advances in Neural Information Processing Systems (pp. 7610-7619), 2018.
Tags: Human Action and Activity Recognition | Links:
@conference{duarte2018videocapsulenet,
title = {VideoCapsuleNet: A Simplified Network for Action Detection},
author = {Kevin Duarte and Yogesh Singh Rawat and Mubarak Shah},
url = {https://papers.nips.cc/paper/7988-videocapsulenet-a-simplified-network-for-action-detection.pdf
https://www.crcv.ucf.edu/home/projects/videocapsulenet/
https://www.crcv.ucf.edu/wp-content/uploads/2019/05/Projects_VideoCapsuleNet_Bibtex.txt},
year = {2018},
date = {2018-04-01},
publisher = {In Advances in Neural Information Processing Systems (pp. 7610-7619)},
keywords = {Human Action and Activity Recognition},
pubstate = {published},
tppubtype = {conference}
}
Kalayeh, Mahdi M.; Basaran, Emrah; Gokmen, Muhittin; Kamasak, Mustafa E.; Shah, Mubarak
Human Semantic Parsing for Person Re-identification Conference
IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2018), Salt Lake City, UT, June 18-22, 2018, 2018.
Tags: Re-Identification | Links:
@conference{Kalayeh2018,
title = { Human Semantic Parsing for Person Re-identification},
author = {Mahdi M. Kalayeh and Emrah Basaran and Muhittin Gokmen and Mustafa E. Kamasak and Mubarak Shah},
url = {http://openaccess.thecvf.com/content_cvpr_2018/papers/Kalayeh_Human_Semantic_Parsing_CVPR_2018_paper.pdf
https://www.crcv.ucf.edu/papers/cvpr2018/semantic_parsing.tex},
year = {2018},
date = {2018-03-31},
publisher = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2018), Salt Lake City, UT, June 18-22, 2018},
keywords = {Re-Identification},
pubstate = {published},
tppubtype = {conference}
}
Zemene, Eyasu; Tesfaye, Yonatan Tariku; Idrees, Haroon; Prati, Andrea; Pelillo, Marcello; Shah, Mubarak
Large-scale Image Geo-Localization Using Dominant Sets Journal Article
In: IEEE Transactions on Pattern Analysis and Machine Intelligence, vol. 41, pp. 148 - 161, 2018.
Tags: Clustering, Dominant Sets, Geo-Localization, Re-Ranking | Links:
@article{Mequanint2020,
title = {Large-scale Image Geo-Localization Using Dominant Sets},
author = {Eyasu Zemene and Yonatan Tariku Tesfaye and Haroon Idrees and Andrea Prati and Marcello Pelillo and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2020/05/Publications_Large-Scale-Image-Geo-Localization-Using-Dominant-Sets.pdf
https://www.crcv.ucf.edu/research/projects/large-scale-image-geo-localization-using-dominant-sets/},
year = {2018},
date = {2018-01-01},
urldate = {2018-01-01},
journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
volume = {41},
pages = {148 - 161},
keywords = {Clustering, Dominant Sets, Geo-Localization, Re-Ranking},
pubstate = {published},
tppubtype = {article}
}
Rawat, Yogesh Singh; Rana, Aayush; Tirupattur, Praveen; Shah, Mubarak
Action and Object Detection for TRECVID Journal Article
In: 2018.
Tags:
@article{rawat2018action,
title = {Action and Object Detection for TRECVID},
author = {Yogesh Singh Rawat and Aayush Rana and Praveen Tirupattur and Mubarak Shah},
year = {2018},
date = {2018-01-01},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2017
Green, Aileen; Bagci, Ulas; Hussein, Sarfaraz; Kelly, Patrick V.; Muzaffara, Razi; Neuschwander-Tetrib, Brent A.; Osmana, Medhat
Brown adipose tissue detected by PET/CT imaging is associated with less central obesity Journal Article
In: vol. Volume 38, , pp. pp. 629-635,, 2017.
Tags: Deep Learning | Links:
@article{Greena2017b,
title = {Brown adipose tissue detected by PET/CT imaging is associated with less central obesity},
author = {Aileen Green and Ulas Bagci and Sarfaraz Hussein and Patrick V. Kelly and Razi Muzaffara and Brent A. Neuschwander-Tetrib and Medhat Osmana},
url = {https://www.crcv.ucf.edu/papers/nmc17.pdf},
year = {2017},
date = {2017-12-31},
volume = {Volume 38, },
pages = {pp. 629-635,},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {article}
}
Idrees, Haroon; Zamir, Amir Roshan; Jiang, Yu-Gang; Gorban, Alex; Laptev, Ivan; Sukthankar, Rahul; Shah, Mubarak
The THUMOS Challenge on Action Recognition for Videos "in the Wild" Journal Article
In: Computer Vision and Image Understanding, 2017.
Tags: Deep Learning | Links:
@article{Idrees2017,
title = {The THUMOS Challenge on Action Recognition for Videos "in the Wild"},
author = {Haroon Idrees and Amir Roshan Zamir and Yu-Gang Jiang and Alex Gorban and Ivan Laptev and Rahul Sukthankar and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/thumosCVIU.pdf},
year = {2017},
date = {2017-12-30},
journal = {Computer Vision and Image Understanding},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {article}
}
Dehghan, Afshin; Shah, Mubarak
Binary Quadratic Programing for Online Tracking of Hundreds of People in Extremely Crowded Scenes Journal Article
In: Transactions on Pattern Analysis and Machine Intelligence, 2017.
Tags: Crowd Analysis, Deep Learning | Links:
@article{Dehghan2017,
title = {Binary Quadratic Programing for Online Tracking of Hundreds of People in Extremely Crowded Scenes},
author = {Afshin Dehghan and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2020/05/Publications_Binary-Quadratic-Programing-for-Online-Tracking-of-Hundreds-of-People-in-Extremely-Crowded-Scenes.pdf},
year = {2017},
date = {2017-12-28},
urldate = {2017-12-28},
journal = {Transactions on Pattern Analysis and Machine Intelligence},
keywords = {Crowd Analysis, Deep Learning},
pubstate = {published},
tppubtype = {article}
}
Rahmani, Hossein; Mian, Ajmal; Shah, Mubarak
Learning a Deep Model for Human Action Recognition from Novel Viewpoints Journal Article
In: Transactions on Pattern Analysis and Machine Intelligence, 2017.
Tags: Deep Learning | Links:
@article{Rahmani2017,
title = {Learning a Deep Model for Human Action Recognition from Novel Viewpoints },
author = {Hossein Rahmani and Ajmal Mian and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/07893732.pdf},
year = {2017},
date = {2017-12-26},
journal = {Transactions on Pattern Analysis and Machine Intelligence},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {article}
}
Komatsu, S.; Markman, A.; Mahalanobis, Abhijit; Chen, Kenny; Javidi, Bahram
Three-dimensional integral imaging and object detection using long-wave infrared imaging Journal Article
In: Applied Optics , 2017.
Tags: Deep Learning
@article{Komatsu2017,
title = {Three-dimensional integral imaging and object detection using long-wave infrared imaging},
author = {S. Komatsu and A. Markman and Abhijit Mahalanobis and Kenny Chen and Bahram Javidi },
year = {2017},
date = {2017-12-12},
journal = {Applied Optics },
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {article}
}
Mahalanobis, Abhijit; Shilling, R.; Muise, Robert; Neifeld, Mark
High resolution imaging using a translating coded aperture Journal Article
In: Optical Engineering , vol. 56, no. 8, 2017.
Tags: Deep Learning
@article{Mahalanobis2017,
title = {High resolution imaging using a translating coded aperture},
author = {Abhijit Mahalanobis and R. Shilling and Robert Muise and Mark Neifeld},
year = {2017},
date = {2017-08-22},
journal = {Optical Engineering },
volume = {56},
number = {8},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {article}
}
Prokaj, Jan; da Vitoria Lobo, Niels
Scale Space Based Grammar for Hand Detection Conference
Springer Lecture Notes in Computer Science, 2017.
Tags: REU
@conference{Prokaj2017,
title = {Scale Space Based Grammar for Hand Detection},
author = {Jan Prokaj and Niels da Vitoria Lobo },
year = {2017},
date = {2017-08-08},
booktitle = {Springer Lecture Notes in Computer Science},
keywords = {REU},
pubstate = {published},
tppubtype = {conference}
}
Finocchiaro, Jessica; Khan, AU; Borji, Ali
Egocentric Height Estimation Conference
WACV, 2017.
Tags: REU
@conference{Finocchiaro2017b,
title = { Egocentric Height Estimation},
author = {Jessica Finocchiaro and AU Khan and Ali Borji},
year = {2017},
date = {2017-08-02},
booktitle = {WACV},
keywords = {REU},
pubstate = {published},
tppubtype = {conference}
}
Laurel, Jacob; and, Aidean Sharghi
Query-focused video summarization: Dataset, Evaluation, and a Memory Network Based Approach Conference
CVPR, 2017.
Tags: REU
@conference{Laurel2017,
title = {Query-focused video summarization: Dataset, Evaluation, and a Memory Network Based Approach},
author = {Jacob Laurel and Aidean Sharghi and et al.},
year = {2017},
date = {2017-08-01},
booktitle = {CVPR},
keywords = {REU},
pubstate = {published},
tppubtype = {conference}
}
Sharghi, Aidean; Laurel, J.; Gong, Boqing
Query-Focused Video Summarization: Dataset, Evaluation, and A Memory Network Based Approach Conference
IEEE Conference on Computer Vision and Pattern Recognition, 2017.
Tags: REU
@conference{Sharghi2017b,
title = {Query-Focused Video Summarization: Dataset, Evaluation, and A Memory Network Based Approach},
author = {Aidean Sharghi and J. Laurel and Boqing Gong },
year = {2017},
date = {2017-07-22},
booktitle = { IEEE Conference on Computer Vision and Pattern Recognition},
keywords = {REU},
pubstate = {published},
tppubtype = {conference}
}
Spampinato, Concetto; Palazzo, Simone; Kavasidis, Isaak; Giordano, Daniela; Shah, Mubarak; Souly, Nasim
Deep Learning Human Mind for Automated Visual Classification Conference
IEEE Conference on Computer Vision and Pattern Recognition, 2017.
Tags: Classification, CVPR, EEG/Brain | Links:
@conference{Spampinato2017,
title = {Deep Learning Human Mind for Automated Visual Classification},
author = {Concetto Spampinato and Simone Palazzo and Isaak Kavasidis and Daniela Giordano and Mubarak Shah and Nasim Souly},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2020/08/Publications_Deep-Learning-Human-Mind-for-Automated-Visual-Classification.pdf
},
year = {2017},
date = {2017-07-21},
urldate = {2017-07-21},
publisher = {IEEE Conference on Computer Vision and Pattern Recognition},
keywords = {Classification, CVPR, EEG/Brain},
pubstate = {published},
tppubtype = {conference}
}
Komatsu, S.; Markman, A.; Mahalanobis, Abhijit; Chen, Kenny; Javidi, Bahram
Passive long-wave infrared three-dimensional integral imaging for face detection and depth estimation: an overview Proceedings
Three-Dimensional Imaging, Visualization, and Display , 2017.
Tags: Deep Learning | Links:
@proceedings{Komatsu2020,
title = {Passive long-wave infrared three-dimensional integral imaging for face detection and depth estimation: an overview},
author = {S. Komatsu and A. Markman and Abhijit Mahalanobis and Kenny Chen and Bahram Javidi},
url = {https://www.spiedigitallibrary.org/conference-proceedings-of-spie/10219/1021918/Passive-long-wave-infrared-three-dimensional-integral-imaging-for-face/10.1117/12.2276286.short},
year = {2017},
date = {2017-05-10},
publisher = {Three-Dimensional Imaging, Visualization, and Display },
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {proceedings}
}
Hussein, Sarfaraz; Gillies, Robert; Cao, Kunlin; Song, Qi; Bagci, Ulas
TumorNET: Lung Nodule Characterization using Multi-View Convolutional Neural Network with Gaussian Process Conference
IEEE ISBI , 2017.
Tags: Deep Learning | Links:
@conference{Hussein2017b,
title = {TumorNET: Lung Nodule Characterization using Multi-View Convolutional Neural Network with Gaussian Process},
author = {Sarfaraz Hussein and Robert Gillies and Kunlin Cao and Qi Song and Ulas Bagci},
url = {https://www.crcv.ucf.edu/papers/1703.00645.pdf},
year = {2017},
date = {2017-04-10},
publisher = {IEEE ISBI },
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Torosdagli, Nelisah; Liberton, Denise; Verma, Payal; Sincan, Murat; Lee, Janice; Pattanaik, Sumantha; Bagci, Ulas
Robust and Fully Automated Segmentation of Mandible from CT Scans Journal Article
In: IEEE ISBI 2017, 2017.
Tags: Deep Learning | Links:
@article{Torosdagli2017,
title = {Robust and Fully Automated Segmentation of Mandible from CT Scans},
author = {Nelisah Torosdagli and Denise Liberton and Payal Verma and Murat Sincan and Janice Lee and Sumantha Pattanaik and Ulas Bagci},
url = {https://www.crcv.ucf.edu/papers/1702.07059.pdf
},
year = {2017},
date = {2017-04-09},
journal = { IEEE ISBI 2017},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {article}
}
Hou, Rui; Chen, Chen; Shah, Mubarak
An End-to-end 3D Convolutional Neural Network for Action Detection and Segmentation in Videos Conference
Cornell University Library, 2017.
Tags: Detection, Segmentation | Links:
@conference{Hou2016,
title = {An End-to-end 3D Convolutional Neural Network for Action Detection and Segmentation in Videos},
author = {Rui Hou and Chen Chen and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/ST-CNN-arXiv.pdf
},
year = {2017},
date = {2017-03-31},
publisher = {Cornell University Library},
keywords = {Detection, Segmentation},
pubstate = {published},
tppubtype = {conference}
}
Tesfaye, Yonatan Tariku; Zemene, Eyasu; Prati, Andrea; Pelillo, Marcello; Shah, Mubarak
Multi-Target Tracking in Multiple Non-Overlapping Cameras using Constrained Dominant Sets Journal Article
In: Cornell University Library, 2017.
Tags: Tracking | Links:
@article{Tesfaye2017,
title = { Multi-Target Tracking in Multiple Non-Overlapping Cameras using Constrained Dominant Sets},
author = {Yonatan Tariku Tesfaye and Eyasu Zemene and Andrea Prati and Marcello Pelillo and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/1706.06196.pdf},
year = {2017},
date = {2017-03-30},
journal = {Cornell University Library},
keywords = {Tracking},
pubstate = {published},
tppubtype = {article}
}
Green, Aileen; Bagci, Ulas; Hussein, Sarfaraz; Kelly, Patrick V.; Muzaffara, Razi; Neuschwander-Tetrib, Brent A.; Osmana, Medhat
Brown adipose tissue detected by PET/CT imaging is associated with less central obesity Journal Article
In: NuclearMedicine Communications, Volume 38, Issue 7, pp. 629-635, July, 2017.
Tags: Medical, Segmentation | Links:
@article{Greena2017,
title = { Brown adipose tissue detected by PET/CT imaging is associated with less central obesity},
author = {Aileen Green and Ulas Bagci and Sarfaraz Hussein and Patrick V. Kelly and Razi Muzaffara and Brent A. Neuschwander-Tetrib and Medhat Osmana},
url = {https://www.crcv.ucf.edu/papers/nmc17.pdf},
year = {2017},
date = {2017-03-29},
journal = {NuclearMedicine Communications, Volume 38, Issue 7, pp. 629-635, July},
publisher = {NuclearMedicine Communications, Volume 38, Issue 7, pp. 629-635, July 2017.},
keywords = {Medical, Segmentation},
pubstate = {published},
tppubtype = {article}
}
Hussein, Sarfaraz; Cao, Kunlin; Song, Qi; Bagci, Ulas
Risk Stratification of Lung Nodules Using 3D CNN-Based Multi-task Learning Conference
Cornell University Library, 2017.
Tags: Medical | Links:
@conference{Hussein2017,
title = { Risk Stratification of Lung Nodules Using 3D CNN-Based Multi-task Learning},
author = {Sarfaraz Hussein and Kunlin Cao and Qi Song and Ulas Bagci},
url = {https://www.crcv.ucf.edu/papers/1704.08797v1.pd.pdf},
year = {2017},
date = {2017-03-28},
address = {Cornell University Library},
keywords = {Medical},
pubstate = {published},
tppubtype = {conference}
}
Mazaheri, Amir; Zhang, Dong; Shah, Mubarak
Video Fill In the Blank using LR/RL LSTMs with Spatial-Temporal Attentions Conference
Cornell University Library, 2017.
Tags: Video Description | Links:
@conference{Mazaheri2017,
title = {Video Fill In the Blank using LR/RL LSTMs with Spatial-Temporal Attentions},
author = {Amir Mazaheri and Dong Zhang and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/1704.04689v1.pd.pdf},
year = {2017},
date = {2017-03-26},
publisher = {Cornell University Library},
keywords = {Video Description},
pubstate = {published},
tppubtype = {conference}
}
Palazzo, Simone; Spampinato, Concetto; Kavasidis, Isaak; Giordano, Daniela; Shah, Mubarak
Generative Adversarial Networks Conditioned by Brain Signals Conference
IEEE International Conference on Computer Vision (ICCV), 2017.
Tags: Deep Learning, EEG/Brain | Links:
@conference{Palazzo2017,
title = {Generative Adversarial Networks Conditioned by Brain Signals},
author = {Simone Palazzo and Concetto Spampinato and Isaak Kavasidis and Daniela Giordano and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/iccv17/egpaper_for_review.pdf
},
year = {2017},
date = {2017-03-25},
urldate = {2017-03-25},
publisher = {IEEE International Conference on Computer Vision (ICCV)},
keywords = {Deep Learning, EEG/Brain},
pubstate = {published},
tppubtype = {conference}
}
LaLonde, Rodney; Zhang, Dong; Shah, Mubarak
Fully Convolutional Deep Neural Networks for Persistent Multi-Frame Multi-Object Detection in Wide Area Aerial Videos Conference
Cornell University Library, 2017.
Tags: Detection | Links:
@conference{LaLonde2017,
title = { Fully Convolutional Deep Neural Networks for Persistent Multi-Frame Multi-Object Detection in Wide Area Aerial Videos},
author = {Rodney LaLonde and Dong Zhang and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/1704.02694v1.pdf},
year = {2017},
date = {2017-03-23},
publisher = {Cornell University Library},
keywords = {Detection},
pubstate = {published},
tppubtype = {conference}
}
Soomro, Khurram; Shah, Mubarak
Unsupervised Action Discovery and Localization in Videos Conference
Proceedings of the IEEE International Conference on Computer Vision (ICCV), 2017.
Tags: Deep Learning | Links:
@conference{Soomro2017,
title = {Unsupervised Action Discovery and Localization in Videos},
author = {Khurram Soomro and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/iccv17/Soomro_ICCV17.pdf
},
year = {2017},
date = {2017-03-23},
publisher = {Proceedings of the IEEE International Conference on Computer Vision (ICCV)},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Hou, Rui; Chen, Chen; Shah, Mubarak
Tube Convolutional Neural Network (T-CNN) for Action Detection in Videos Conference
Cornell University Library, 2017.
Tags: Detection | Links:
@conference{Hou2017,
title = {Tube Convolutional Neural Network (T-CNN) for Action Detection in Videos},
author = {Rui Hou and Chen Chen and Mubarak Shah},
url = {https://arxiv.org/pdf/1703.10664.pdf},
year = {2017},
date = {2017-03-22},
publisher = {Cornell University Library},
keywords = {Detection},
pubstate = {published},
tppubtype = {conference}
}
Souly, Nasim; Spampinato, Concetto; Shah, Mubarak
Semi Supervised Semantic Segmentation Using Generative Adversarial Network Conference
IEEE International Conference on Computer Vision (ICCV), 2017.
Tags: Deep Learning | Links:
@conference{Souly2017b,
title = { Semi Supervised Semantic Segmentation Using Generative Adversarial Network},
author = {Nasim Souly and Concetto Spampinato and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/iccv17/GAN_Semantic_cameraReady.pdf},
year = {2017},
date = {2017-03-22},
publisher = { IEEE International Conference on Computer Vision (ICCV)},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Fan, Deng-Ping; Cheng, Ming-Ming; Liu, Yun; Li, Tao; Borji, Ali
Structure-measure: A New Way to Evaluate Foreground Maps Conference
IEEE International Conference on Computer Vision (ICCV), 2017.
Tags: Deep Learning | Links:
@conference{Fan2017,
title = { Structure-measure: A New Way to Evaluate Foreground Maps},
author = {Deng-Ping Fan and Ming-Ming Cheng and Yun Liu and Tao Li and Ali Borji},
url = {https://www.crcv.ucf.edu/papers/iccv17/1164.pdf},
year = {2017},
date = {2017-03-21},
publisher = {IEEE International Conference on Computer Vision (ICCV)},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Souly, Nasim; Spampinato, Concetto; Shah, Mubarak
Semi and Weakly Supervised Semantic Segmentation Using Generative Adversarial Network Conference
Cornell University Library, 2017.
Tags: Segmentation | Links:
@conference{Souly2017,
title = {Semi and Weakly Supervised Semantic Segmentation Using Generative Adversarial Network},
author = {Nasim Souly and Concetto Spampinato and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/1703.09695.pdf},
year = {2017},
date = {2017-03-20},
publisher = {Cornell University Library},
keywords = {Segmentation},
pubstate = {published},
tppubtype = {conference}
}
Tavakoli, Hamed R.; Shetty, Rakshith; Borji, Ali; Laaksonen, Jorma
Paying Attention to Descriptions Generated by Image Captioning Models Conference
IEEE International Conference on Computer Vision (ICCV), Venice, Italy, 2017.
Tags: Deep Learning | Links:
@conference{Tavakoli2017,
title = {Paying Attention to Descriptions Generated by Image Captioning Models},
author = {Hamed R. Tavakoli and Rakshith Shetty and Ali Borji and Jorma Laaksonen},
url = {https://www.crcv.ucf.edu/papers/iccv17/1704.07434.pdf},
year = {2017},
date = {2017-03-19},
publisher = { IEEE International Conference on Computer Vision (ICCV), Venice, Italy},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Wang, Tiantian; Borji, Ali; Zhang, Lihe; Zhang, Pingping; Lu, Huchuan
A Stagewise Refinement Model for Detecting Salient Objects in Images Conference
IEEE International Conference on Computer Vision (ICCV), Venice, Italy, 2017.
Tags: Deep Learning | Links:
@conference{Wang2017,
title = {A Stagewise Refinement Model for Detecting Salient Objects in Images},
author = {Tiantian Wang and Ali Borji and Lihe Zhang and Pingping Zhang and Huchuan Lu},
url = {https://www.crcv.ucf.edu/papers/iccv17/1709.pdf},
year = {2017},
date = {2017-03-17},
publisher = { IEEE International Conference on Computer Vision (ICCV), Venice, Italy},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Zhang, Yang; David, P.; Gong, Boqing
Curriculum Domain Adaptation for Semantic Segmentation of Urban Scenes Conference
IEEE International Conference on Computer Vision (ICCV), Venice, Italy,, 2017.
Tags: Deep Learning | Links:
@conference{Zhang2017,
title = {Curriculum Domain Adaptation for Semantic Segmentation of Urban Scenes},
author = {Yang Zhang and P. David and Boqing Gong},
url = {https://www.crcv.ucf.edu/papers/iccv17/CurriculumDA.pdf
https://www.crcv.ucf.edu/papers/iccv17/VQS-Supp.pdf
https://github.com/Cold-Winter/vqs},
year = {2017},
date = {2017-03-16},
publisher = { IEEE International Conference on Computer Vision (ICCV), Venice, Italy,},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Gan, C.; Li, Y.; Li, H.; Sun, C.; Gong, Boqing
VQS: Linking Segmentations to Questions and Answers for Supervised Attention in VQA and Question-Focused Semantic Segmentation Conference
IEEE International Conference on Computer Vision (ICCV), Venice, Italy, 2017.
Tags: Deep Learning | Links:
@conference{Gan2017,
title = {VQS: Linking Segmentations to Questions and Answers for Supervised Attention in VQA and Question-Focused Semantic Segmentation},
author = {C. Gan and Y. Li and H. Li and C. Sun and Boqing Gong},
url = {https://www.crcv.ucf.edu/papers/iccv17/VQS.pdf
https://www.crcv.ucf.edu/papers/iccv17/1707.09465.pd.pdf},
year = {2017},
date = {2017-03-13},
publisher = { IEEE International Conference on Computer Vision (ICCV), Venice, Italy},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Prakash, Harish Ravi; Korostenskaja, Milena; Castillo, Eduardo; Lee, Ki; Baumgartner, James; Bagci, Ulas
Automatic Response Assessment in Regions of Language Cortex in Epilepsy Patients Using ECoG-based Functional Mapping and Machine Learning Conference
IEEE SMC, 2017.
Tags: Deep Learning | Links:
@conference{Prakash2017,
title = {Automatic Response Assessment in Regions of Language Cortex in Epilepsy Patients Using ECoG-based Functional Mapping and Machine Learning},
author = {Harish Ravi Prakash and Milena Korostenskaja and Eduardo Castillo and Ki Lee and James Baumgartner and Ulas Bagci},
url = {https://www.crcv.ucf.edu/papers/1706.01380.pdf},
year = {2017},
date = {2017-03-13},
publisher = { IEEE SMC},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Mortazi, Aliasghar; Karim, Rashed; Rhode, Kawal; Burt, Jeremy; Bagci, Ulas
CardiacNET: Segmentation of Left Atrium and Proximal Pulmonary Veins from MRI Using Multi-View CNN Conference
MICCAI 2017,, 2017.
Tags: Deep Learning | Links:
@conference{Mortazi2017,
title = {CardiacNET: Segmentation of Left Atrium and Proximal Pulmonary Veins from MRI Using Multi-View CNN},
author = {Aliasghar Mortazi and Rashed Karim and Kawal Rhode and Jeremy Burt and Ulas Bagci},
url = {https://www.crcv.ucf.edu/papers/1705.06333.pdf},
year = {2017},
date = {2017-03-12},
publisher = { MICCAI 2017,},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Finocchiaro, Jessica; Khan, Aisha Urooj; Borji, Ali
Egocentric Height Estimation Conference
WACV , 2017.
Tags: Deep Learning | Links:
@conference{Finocchiaro2017,
title = {Egocentric Height Estimation},
author = {Jessica Finocchiaro and Aisha Urooj Khan and Ali Borji},
url = {https://www.crcv.ucf.edu/papers/1610.02714.pdf},
year = {2017},
date = {2017-03-08},
publisher = { WACV },
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Kavasidis, Isaak; Palazzo, Simone; Spampinato, Concetto; Giordano, Daniela; Shah, Mubarak
Brain2Image: Converting Brain Signals into Images Conference
ACM Multimedia 25, Mountain View, CA, 2017.
Tags: Deep Learning, EEG/Brain | Links:
@conference{kavasidiskavasidis2017,
title = {Brain2Image: Converting Brain Signals into Images},
author = {Isaak Kavasidis and Simone Palazzo and Concetto Spampinato and Daniela Giordano and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/camera_ready_acmmm_BNI08.pdf},
year = {2017},
date = {2017-03-07},
urldate = {2017-03-07},
publisher = {ACM Multimedia 25, Mountain View, CA},
keywords = {Deep Learning, EEG/Brain},
pubstate = {published},
tppubtype = {conference}
}
Tian, Yicong; Chen, Chen; Shah, Mubarak
Cross-View Image Matching for Geo-localization in Urban Environments Conference
IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2017), 2017.
Tags: Deep Learning | Links:
@conference{Tian2017b,
title = { Cross-View Image Matching for Geo-localization in Urban Environments},
author = {Yicong Tian and Chen Chen and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/cvpr2017/geolocalization_cvpr17.pdf
https://www.crcv.ucf.edu/research/cross-view-image-matching-for-geo-localization-in-urban-environments/},
year = {2017},
date = {2017-02-28},
urldate = {2017-02-28},
publisher = { IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2017)},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Kalayeh, Mahdi M.; Gong, Boqing; Shah, Mubarak
Improving Facial Attribute Prediction using Semantic Segmentation Conference
IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2017), 2017.
Tags: Deep Learning | Links:
@conference{M.Kalayeh2017,
title = {Improving Facial Attribute Prediction using Semantic Segmentation},
author = {Mahdi M. Kalayeh and Boqing Gong and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/cvpr2017/Kalayeh_CVPR2017.pdf},
year = {2017},
date = {2017-02-27},
publisher = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2017)},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Sharghi, Aidean; Laurel, J.; Gong, Boqing
Query-Focused Video Summarization: Dataset, Evaluation, and A Memory Network Based Approach Conference
IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2017), 2017.
Tags: Deep Learning | Links:
@conference{Sharghi2017,
title = {Query-Focused Video Summarization: Dataset, Evaluation, and A Memory Network Based Approach},
author = {Aidean Sharghi and J. Laurel and Boqing Gong},
url = {https://www.crcv.ucf.edu/papers/cvpr2017/Sharghi_CVPR2017.pdf
https://www.aidean-sharghi.com/cvpr2017/
https://www.crcv.ucf.edu/papers/cvpr2017/Sharghi_CVPR2017_Supp.pdf},
year = {2017},
date = {2017-02-26},
publisher = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2017)},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Tavakoli, Hamed R.; Ahmed, Fawad; Borji, Ali; Laaksonen, Jorma
Revisiting visual saliency evaluation: analysis of scores, mouse clicks, and contextual annotations Conference
IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2017), 2017.
Tags: Deep Learning | Links:
@conference{Tavakoli2017b,
title = { Revisiting visual saliency evaluation: analysis of scores, mouse clicks, and contextual annotations},
author = {Hamed R. Tavakoli and Fawad Ahmed and Ali Borji and Jorma Laaksonen},
url = {https://www.crcv.ucf.edu/papers/cvpr2017/saliencypaper.pdf},
year = {2017},
date = {2017-02-26},
publisher = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2017)},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Hou, Qibin; Cheng, Ming-Ming; Hu, Xiao-Wei; Borji, Ali; Tu, Zhuowen; Torr, Philip
Deeply supervised salient object detection with short connections Conference
IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2017), 2017.
Tags: Deep Learning | Links:
@conference{Hou2017b,
title = {Deeply supervised salient object detection with short connections},
author = {Qibin Hou and Ming-Ming Cheng and Xiao-Wei Hu and Ali Borji and Zhuowen Tu and Philip Torr},
url = {https://www.crcv.ucf.edu/papers/cvpr2017/salobjshortconnections.pdf},
year = {2017},
date = {2017-02-25},
publisher = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2017)},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Celik, Haydar; Turkbey, Baris; Choyke, Peter; Cheng, Ruida; McCreedy, Evan; McAuliffe, Matthew; Khosravan, Naji; Bagci, Ulas; Wood, Bradford
Eye Tracking System for Prostate Cancer Diagnosis Using Multi-Parametric MRI Conference
25th Annual Conference on ISMRM, 2017.
Tags: Deep Learning | Links:
@conference{Celik2017,
title = { Eye Tracking System for Prostate Cancer Diagnosis Using Multi-Parametric MRI},
author = {Haydar Celik and Baris Turkbey and Peter Choyke and Ruida Cheng and Evan McCreedy and Matthew McAuliffe and Naji Khosravan and Ulas Bagci and Bradford Wood},
url = {https://www.crcv.ucf.edu/papers/},
year = {2017},
date = {2017-02-24},
publisher = { 25th Annual Conference on ISMRM},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Mazaheri, Amir; Gong, Boqing; Shah, Mubarak
Learning a Multi-Concept Video Retrieval Model with Multiple Latent Variables Journal Article
In: IEEE International Symposium on. IEEE, 2017.
Tags: Deep Learning | Links:
@article{Mazaheri2017b,
title = { Learning a Multi-Concept Video Retrieval Model with Multiple Latent Variables},
author = {Amir Mazaheri and Boqing Gong and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/PID4497015},
year = {2017},
date = {2017-02-24},
journal = {IEEE International Symposium on. IEEE},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {article}
}
Green, Aileen; Bagci, Ulas; Hussein, Sarfaraz; Kelly, Patrick V.; Muzaffara, Razi; Neuschwander-Tetrib, Brent A.; Osmana, Medhat
Brown adipose tissue detected by PET/CT imaging is associated with less central obesity Journal Article
In: NuclearMedicine Communications, 2017.
Tags: Deep Learning | Links:
@article{Greena2017c,
title = {Brown adipose tissue detected by PET/CT imaging is associated with less central obesity},
author = {Aileen Green and Ulas Bagci and Sarfaraz Hussein and Patrick V. Kelly and Razi Muzaffara and Brent A. Neuschwander-Tetrib and Medhat Osmana},
url = {https://www.crcv.ucf.edu/papers/nmc17.pdf},
year = {2017},
date = {2017-01-31},
journal = {NuclearMedicine Communications},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {article}
}
Soomro, Khurram
Online, Supervised and Unsupervised Action Localization in Videos PhD Thesis
University of Central Florida, 2017.
Tags: Deep Learning | Links:
@phdthesis{Soomro2017b,
title = {Online, Supervised and Unsupervised Action Localization in Videos},
author = {Khurram Soomro},
url = {https://www.crcv.ucf.edu/papers/theses/Soomro.pdf},
year = {2017},
date = {2017-01-31},
school = {University of Central Florida},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {phdthesis}
}
Souly, Nasim
Visual Saliency Detection and Semantic Segmentation PhD Thesis
University of Central Florida, 2017.
Tags: Deep Learning | Links:
@phdthesis{Souly2017c,
title = {Visual Saliency Detection and Semantic Segmentation},
author = {Nasim Souly},
url = {https://www.crcv.ucf.edu/papers/theses/Souly.pdf},
year = {2017},
date = {2017-01-30},
school = {University of Central Florida},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {phdthesis}
}
Vaca-Castano, Gonzalo
Understanding Images and Videos Using Context PhD Thesis
University of Central Florida, 2017.
Tags: Deep Learning | Links:
@phdthesis{Vaca2017,
title = {Understanding Images and Videos Using Context},
author = {Gonzalo Vaca-Castano},
url = {https://www.crcv.ucf.edu/papers/theses/Vaca.pdf
https://youtu.be/SY5vZss62B0
},
year = {2017},
date = {2017-01-28},
school = {University of Central Florida},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {phdthesis}
}
Sultani, Waqas
Weekly Labeled Action Recognition and Detection PhD Thesis
University of Central Florida, 2017.
Tags: Deep Learning | Links:
@phdthesis{Sultani2017b,
title = {Weekly Labeled Action Recognition and Detection},
author = {Waqas Sultani},
url = {https://www.crcv.ucf.edu/papers/theses/},
year = {2017},
date = {2017-01-27},
school = {University of Central Florida},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {phdthesis}
}
Rawat, Yogesh Singh; Song, Mingli; Kankanhalli, Mohan
A spring-electric graph model for socialized group photography Journal Article
In: IEEE Transactions on Multimedia, vol. 20, no. 3, pp. 754–766, 2017.
Tags:
@article{rawat2017spring,
title = {A spring-electric graph model for socialized group photography},
author = {Yogesh Singh Rawat and Mingli Song and Mohan Kankanhalli},
year = {2017},
date = {2017-01-01},
journal = {IEEE Transactions on Multimedia},
volume = {20},
number = {3},
pages = {754--766},
publisher = {IEEE},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2016
Hussein, Sarfaraz; Green, Aileen; Watane, Arjun; Reiter, David; Chen, Xinjian; Papadakis, Georgios Z.; Wood, Bradford; Cypess, Aaron; Osman, Medhat; Bagci, Ulas
Automatic Segmentation and Quantification of White and Brown Adipose Tissues from PET/CT Scans Journal Article
In: IEEE Transactions on Medical Imaging, 2016.
Tags: Deep Learning | Links:
@article{Hussein2016,
title = {Automatic Segmentation and Quantification of White and Brown Adipose Tissues from PET/CT Scans},
author = {Sarfaraz Hussein and Aileen Green and Arjun Watane and David Reiter and Xinjian Chen and Georgios Z. Papadakis and Bradford Wood and Aaron Cypess and Medhat Osman and Ulas Bagci},
url = {https://www.crcv.ucf.edu/papers/07775001.pdf},
year = {2016},
date = {2016-12-30},
journal = { IEEE Transactions on Medical Imaging},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {article}
}
Borji, Ali; Feng, Mengyang; Lu, Huchuan
Vanishing point attracts gaze in free-viewing and visual search tasks Journal Article
In: Journal of Vision, 2016.
Tags: Deep Learning | Links:
@article{Borji2016b,
title = {Vanishing point attracts gaze in free-viewing and visual search tasks},
author = {Ali Borji and Mengyang Feng and Huchuan Lu},
url = {https://www.crcv.ucf.edu/papers/i1534-7362-16-14-18.pdf},
year = {2016},
date = {2016-12-29},
journal = {Journal of Vision},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {article}
}
Zhou, Y; Teomete, U; Dandin, O; Osman, O; Dandinoglu, T; Bagci, Ulas; Zhao, W
Computer-Aided Detection (CADx) for Plastic Deformation Fractures in Pediatric Forearm Journal Article
In: 2016.
Tags: Deep Learning | Links:
@article{Zhou2016,
title = {Computer-Aided Detection (CADx) for Plastic Deformation Fractures in Pediatric Forearm},
author = {Y Zhou and U Teomete and O Dandin and O Osman and T Dandinoglu and Ulas Bagci and W Zhao},
url = {https://www.crcv.ucf.edu/papers/index.php},
year = {2016},
date = {2016-12-27},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {article}
}
Papadakis, Georgios Z.; Millo, Corina; Sadowski, Samira M.; Bagci, Ulas; Patronas, Nicholas J.
Kidney Tumor in a von Hippel-Lindau (VHL) Patient With Intensely Increased Activity on 68Ga-DOTA-TATE PET/CT Journal Article
In: Clinical Nuclear Medicine, 2016.
Tags: Deep Learning | Links:
@article{Papadakis2016,
title = {Kidney Tumor in a von Hippel-Lindau (VHL) Patient With Intensely Increased Activity on 68Ga-DOTA-TATE PET/CT},
author = {Georgios Z. Papadakis and Corina Millo and Samira M. Sadowski and Ulas Bagci and Nicholas J. Patronas},
url = {https://scholar.google.com/citations?view_op=view_citation&hl=en&user=9LUdPM4AAAAJ&sortby=pubdate&citation_for_view=9LUdPM4AAAAJ:K4-iKlO5MD4C},
year = {2016},
date = {2016-12-24},
journal = {Clinical Nuclear Medicine},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {article}
}
Vaca-Castano, Gonzalo; Das, Samarjit; Sousa, Joao P.; da Vitoria Lobo, Niels; Shah, Mubarak
Improved scene identification and object detection on egocentric vision of daily activities Journal Article
In: Computer Vision and Image Understanding, 2016.
Tags: Deep Learning | Links:
@article{Vaca-Castano2016b,
title = {Improved scene identification and object detection on egocentric vision of daily activities},
author = {Gonzalo Vaca-Castano and Samarjit Das and Joao P. Sousa and Niels da Vitoria Lobo and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/cviu2016-gonzalo.pdf
http://dx.doi.org/10.1016/j.cviu.2016.10.016},
year = {2016},
date = {2016-12-22},
journal = {Computer Vision and Image Understanding},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {article}
}
Papadakis, Georgios Z.; Millo, Corina; Sadowski, Samira M.; Bagci, Ulas; Patronas, Nicholas J.
Epididymal Cystadenomas in von Hippel-Lindau Disease Showing Increased Activity on 68Ga DOTATATE PET/CT, Journal Article
In: 2016.
Tags: Deep Learning | Links:
@article{Papadakis2016b,
title = {Epididymal Cystadenomas in von Hippel-Lindau Disease Showing Increased Activity on 68Ga DOTATATE PET/CT,},
author = {Georgios Z. Papadakis and Corina Millo and Samira M. Sadowski and Ulas Bagci and Nicholas J. Patronas},
url = {https://www.crcv.ucf.edu/papers/Epididymal_Cystadenomas_in_von_Hippel_Lindau.9.pdf
http://doi.org/10.1097/RLU.0000000000001314},
year = {2016},
date = {2016-12-21},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {article}
}
Papadakis, Georgios Z.; Millo, Corina; Sadowski, Samira M.; Bagci, Ulas; Patronas, Nicholas J.
Endolymphatic Sac Tumor Showing Increased Activity on 68Ga DOTATATE PET/CT Journal Article
In: Clinical Nuclear Medicine, 2016.
Tags: Deep Learning | Links:
@article{Papadakis2016c,
title = {Endolymphatic Sac Tumor Showing Increased Activity on 68Ga DOTATATE PET/CT},
author = {Georgios Z. Papadakis and Corina Millo and Samira M. Sadowski and Ulas Bagci and Nicholas J. Patronas},
url = {https://www.crcv.ucf.edu/papers/Endolymphatic_Sac_Tumor_Showing_Increased_Activity.10.pdf
http://doi.org/10.1097/RLU.0000000000001315},
year = {2016},
date = {2016-12-20},
journal = {Clinical Nuclear Medicine},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {article}
}
Johnson, Reed F.; Bagci, Ulas; Keith, Lauren; Tang, Xianchun; Mollura, Daniel J.; Zeitlin, Larry; Qin, Jing; Huzella, Louis; Bartos, Christopher J.; Bohorova, Natasha; Bohorov, Ognian; Goodman, Charles; Kim, Do H.; Paulty, Michael H.; Velasco, Jesus; Whaley, Kevin J.; Johnson, Joshua C.; Pettitt, James; Ork, Britini L.; Solomon, Jeffrey; Oberlander, Nicholas; Zhu, Quan; Sun, Jiusong; Holbrook, Michael R.; Olinger, Gene; Baric, Ralph S.; Hensley, Lisa; Jahrling, Peter; Marasco, Wayne A.
3B11-N, a monoclonal antibody against MERS-CoV, reduces lung pathology in rhesus monkeys following intratracheal inoculation of MERS-CoV Jordan-n3/2012 Journal Article
In: Virology, 2016.
Tags: Deep Learning | Links:
@article{Johnson2016,
title = {3B11-N, a monoclonal antibody against MERS-CoV, reduces lung pathology in rhesus monkeys following intratracheal inoculation of MERS-CoV Jordan-n3/2012},
author = {Reed F. Johnson and Ulas Bagci and Lauren Keith and Xianchun Tang and Daniel J. Mollura and Larry Zeitlin and Jing Qin and Louis Huzella and Christopher J. Bartos and Natasha Bohorova and Ognian Bohorov and Charles Goodman and Do H. Kim and Michael H. Paulty and Jesus Velasco and Kevin J. Whaley and Joshua C. Johnson and James Pettitt and Britini L. Ork and Jeffrey Solomon and Nicholas Oberlander and Quan Zhu and Jiusong Sun and Michael R. Holbrook and Gene Olinger and Ralph S. Baric and Lisa Hensley and Peter Jahrling and Wayne A. Marasco},
url = {https://www.crcv.ucf.edu/papers/1-s2.0-S0042682216000076-main.pdf
http://dx.doi.org/10.1016/j.virol.2016.01.004},
year = {2016},
date = {2016-12-19},
journal = {Virology},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {article}
}
Borji, Ali; Feng, Mengyang; Lu, Huchuan
Vanishing point attracts gaze in free-viewing and visual search tasks Journal Article
In: Journal of Vision, 2016.
Tags: Deep Learning | Links:
@article{Borji2016c,
title = {Vanishing point attracts gaze in free-viewing and visual search tasks},
author = {Ali Borji and Mengyang Feng and Huchuan Lu},
url = {https://www.crcv.ucf.edu/papers/index.php},
year = {2016},
date = {2016-12-18},
journal = {Journal of Vision},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {article}
}
Candemir, Sema; Jaeger, Stefan; Antani, Sameer; Bagci, Ulas; Folio, Les R.; Xu, Ziyue; Thoma, George
Atlas-based Rib-Bone Detection in Chest X-rays Journal Article
In: Computerized Medical Imaging and Graphics, 2016.
Tags: Deep Learning | Links:
@article{Candemir2016,
title = {Atlas-based Rib-Bone Detection in Chest X-rays},
author = {Sema Candemir and Stefan Jaeger and Sameer Antani and Ulas Bagci and Les R. Folio and Ziyue Xu and George Thoma},
url = {https://www.crcv.ucf.edu/papers/cmig2016.pdf},
year = {2016},
date = {2016-12-17},
journal = {Computerized Medical Imaging and Graphics},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {article}
}
Wang, Jingwei; Borji, Ali; Kuo, C. -C. Jay; Itti, Laurent
Learning a combined model of visual saliency for fixation prediction Journal Article
In: IEEE Transactions on Image Processing, 2016.
Tags: Deep Learning | Links:
@article{Wang2016,
title = {Learning a combined model of visual saliency for fixation prediction},
author = {Jingwei Wang and Ali Borji and C.-C. Jay Kuo and Laurent Itti},
url = {https://www.crcv.ucf.edu/papers/itip2016.pdf},
year = {2016},
date = {2016-12-15},
journal = {IEEE Transactions on Image Processing},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {article}
}
Johnson, Reed F.; Bagci, Ulas; Keith, Lauren; Tang, Xianchun; Mollura, Daniel J.; Zeitlin, Larry; Qin, Jing; Huzella, Louis; Bartos, Christopher J.; Bohorova, Natasha; Bohorov, Ognian; Goodman, Charles; Kim, Do H.; Paulty, Michael H.; Velasco, Jesus; Whaley, Kevin J.; Johnson, Joshua C.; Pettitt, James; Ork, Britini L.; Solomon, Jeffrey; Oberlander, Nicholas; Zhu, Quan; Sun, Jiusong; Holbrook, Michael R.; Olinger, Gene; Baric, Ralph S.; Hensley, Lisa; Jahrling, Peter; Marasco, Wayne A.
3B11-N, a monoclonal antibody against MERS-CoV, reduces lung pathology in rhesus monkeys following intratracheal inoculation of MERS-CoV Jordan-n3/2012 Journal Article
In: 2016.
Tags: Deep Learning | Links:
@article{Johnson2016b,
title = {3B11-N, a monoclonal antibody against MERS-CoV, reduces lung pathology in rhesus monkeys following intratracheal inoculation of MERS-CoV Jordan-n3/2012},
author = {Reed F. Johnson and Ulas Bagci and Lauren Keith and Xianchun Tang and Daniel J. Mollura and Larry Zeitlin and Jing Qin and Louis Huzella and Christopher J. Bartos and Natasha Bohorova and Ognian Bohorov and Charles Goodman and Do H. Kim and Michael H. Paulty and Jesus Velasco and Kevin J. Whaley and Joshua C. Johnson and James Pettitt and Britini L. Ork and Jeffrey Solomon and Nicholas Oberlander and Quan Zhu and Jiusong Sun and Michael R. Holbrook and Gene Olinger and Ralph S. Baric and Lisa Hensley and Peter Jahrling and Wayne A. Marasco},
url = {https://www.crcv.ucf.edu/papers/Virology2016.pdf},
year = {2016},
date = {2016-12-09},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {article}
}
Mahalanobis, Abhijit
Pixel Resolution Improvement using a Sliding Mask (PRISM) Journal Article
In: Imaging and Applied Optics Congress, 2016.
Tags: Deep Learning
@article{Mahalanobis2016,
title = {Pixel Resolution Improvement using a Sliding Mask (PRISM)},
author = {Abhijit Mahalanobis},
year = {2016},
date = {2016-07-01},
journal = {Imaging and Applied Optics Congress},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {article}
}
Mahalanobis, Abhijit; Javidi, Bahram; Chen, Kenny
Integral Imaging for long range and obscured environments Proceedings
Imaging and Applied Optics Congress, 2016.
Tags: Deep Learning
@proceedings{Mahalanobis2016b,
title = {Integral Imaging for long range and obscured environments},
author = {Abhijit Mahalanobis and Bahram Javidi and Kenny Chen},
year = {2016},
date = {2016-07-01},
publisher = {Imaging and Applied Optics Congress},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {proceedings}
}
Forman, Arthur; Mahalanobis, Abhijit
Restoration of randomly sampled blurred images Proceedings
Automatic Target Recognition XXVI, 2016.
Tags: Deep Learning
@proceedings{Forman2016,
title = {Restoration of randomly sampled blurred images},
author = {Arthur Forman and Abhijit Mahalanobis},
year = {2016},
date = {2016-05-01},
publisher = {Automatic Target Recognition XXVI},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {proceedings}
}
Assari, Shayan Modiri; Idrees, Haroon; Shah, Mubarak
Human Re-identification in Crowd Videos using Personal, Social and Environmental Constraints Conference
European Conference on Computer Vision (ECCV), 2016.
Tags: Crowd Analysis, Deep Learning | Links:
@conference{Assari2016,
title = { Human Re-identification in Crowd Videos using Personal, Social and Environmental Constraints},
author = {Shayan Modiri Assari and Haroon Idrees and Mubarak Shah},
url = { Human Re-identification in Crowd Videos using Personal, Social and Environmental Constraints},
year = {2016},
date = {2016-04-14},
urldate = {2016-04-14},
publisher = {European Conference on Computer Vision (ECCV)},
keywords = {Crowd Analysis, Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Hou, Rui; Sukthankar, Rahul; Shah, Mubarak
Real-Time Temporal Action Localization in Untrimmed Videos by Sub-Action Discovery Conference
BMVC 2017, London, UK, 2016.
Tags: Deep Learning | Links:
@conference{Hou2016b,
title = {Real-Time Temporal Action Localization in Untrimmed Videos by Sub-Action Discovery},
author = {Rui Hou and Rahul Sukthankar and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/0108.pdf
},
year = {2016},
date = {2016-04-12},
publisher = { BMVC 2017, London, UK},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Mahalanobis, Abhijit
Tracker-aided adaptive multi-frame recognition of a specific target Proceedings
no. 9844-39, 2016.
Tags: Deep Learning
@proceedings{Mahalanobis2016c,
title = {Tracker-aided adaptive multi-frame recognition of a specific target},
author = {Abhijit Mahalanobis},
year = {2016},
date = {2016-04-01},
number = {9844-39},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {proceedings}
}
Tian, Yicong; Shah, Mubarak
On Duality Of Multiple Target Tracking and Semgentation Conference
Cornell University Library, 2016.
Tags: Segmentation, Tracking | Links:
@conference{Tian2017,
title = {On Duality Of Multiple Target Tracking and Semgentation},
author = {Yicong Tian and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/1610.04542.pdf
},
year = {2016},
date = {2016-03-18},
publisher = {Cornell University Library},
keywords = {Segmentation, Tracking},
pubstate = {published},
tppubtype = {conference}
}
Sultani, Waqas; Shah, Mubarak
Automatic Action Annotation in Weakly Labeled Videos Conference
Cornell University Library, 2016.
Tags: Classification, Detection | Links:
@conference{Sultani2017,
title = { Automatic Action Annotation in Weakly Labeled Videos},
author = {Waqas Sultani and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/CVIU_Manuscript_Waqas_AL.pdf},
year = {2016},
date = {2016-03-09},
publisher = {Cornell University Library},
keywords = {Classification, Detection},
pubstate = {published},
tppubtype = {conference}
}
Zhang, Dong; Shah, Mubarak
A Framework for Human Pose Estimation in Videos Conference
Cornell University Library, 2016.
Tags: Deep Learning | Links:
@conference{Zhang2019,
title = {A Framework for Human Pose Estimation in Videos},
author = {Dong Zhang and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/HumanPoseEstimationInVideos_Arxiv.pdf
http://arxiv.org/abs/1604.07788v1},
year = {2016},
date = {2016-03-08},
publisher = { Cornell University Library},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Idrees, Haroon; Zamir, Amir Roshan; Jiang, Yu-Gang; Gorban, Alex; Laptev, Ivan; Sukthankar, Rahul; Shah, Mubarak
The THUMOS Challenge on Action Recognition for Videos "in the Wild" Journal Article
In: Cornell University Library,, 2016.
Tags: Deep Learning | Links:
@article{Idrees2016,
title = {The THUMOS Challenge on Action Recognition for Videos "in the Wild"},
author = {Haroon Idrees and Amir Roshan Zamir and Yu-Gang Jiang and Alex Gorban and Ivan Laptev and Rahul Sukthankar and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/thumosCVIU.pdf
http://arxiv.org/abs/1604.06182v1},
year = {2016},
date = {2016-03-07},
journal = { Cornell University Library,},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {article}
}
Dehghan, Afshin; Shah, Mubarak
Quadratic Programing for Online Tracking of Hundreds of People in Extremely Crowded Scenes Conference
Cornell University Library, 2016.
Tags: Deep Learning | Links:
@conference{Dehghan2016,
title = {Quadratic Programing for Online Tracking of Hundreds of People in Extremely Crowded Scenes},
author = {Afshin Dehghan and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/1603.09240v1.pdf
http://arxiv.org/abs/1603.09240
},
year = {2016},
date = {2016-03-06},
publisher = {Cornell University Library},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Castelli, Thomas; Sharghi, Aidean; Harper, Don; Tremeau, Alain; Shah, Mubarak
Autonomous navigation for low-altitude UAVs in urban areas Conference
Cornell University Library, 2016.
Tags: Deep Learning | Links:
@conference{Castelli2016,
title = {Autonomous navigation for low-altitude UAVs in urban areas},
author = {Thomas Castelli and Aidean Sharghi and Don Harper and Alain Tremeau and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/1602.08141v1.pdf
http://arxiv.org/abs/1602.08141
},
year = {2016},
date = {2016-03-04},
publisher = {Cornell University Library},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Buty, Mario; Xu, Ziyue; Gao, Mingchen; Bagci, Ulas; Wu, Aaron; Mollura, Daniel J.
Characterization of Lung Nodule Malignancy using Hybrid Shape and Appearance Features Conference
MICCAI 2016, 2016.
Tags: Deep Learning | Links:
@conference{Buty2016,
title = { Characterization of Lung Nodule Malignancy using Hybrid Shape and Appearance Features},
author = {Mario Buty and Ziyue Xu and Mingchen Gao and Ulas Bagci and Aaron Wu and Daniel J. Mollura},
url = {https://www.crcv.ucf.edu/papers/index.php},
year = {2016},
date = {2016-02-25},
publisher = { MICCAI 2016},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Khosravan, Naji; Celik, Haydar; Turkbey, Baris; Cheng, Ruida; McCreedy, Evan; McAuliffe, Matthew; Sandra Bednarova,; Jones, Elizabeth; Chen, Xinjian; Choyke, Peter; Wood, Bradford; Bagci, Ulas
Gaze2Segment: A Pilot Study for Integrating Eye-Tracking Technology into Medical Image Segmentation Conference
Medical Computer Vision, 2016.
Tags: Deep Learning | Links:
@conference{Khosravan2016,
title = {Gaze2Segment: A Pilot Study for Integrating Eye-Tracking Technology into Medical Image Segmentation},
author = {Naji Khosravan and Haydar Celik and Baris Turkbey and Ruida Cheng and Evan McCreedy and Matthew McAuliffe and Sandra Bednarova, and Elizabeth Jones and Xinjian Chen and Peter Choyke and Bradford Wood and Ulas Bagci},
url = {https://www.crcv.ucf.edu/papers/gaze2segment.PDF},
year = {2016},
date = {2016-02-23},
publisher = {Medical Computer Vision},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Bylinskii, Zoya; Recasens, Adria; Borji, Ali; Oliva, Aude; Torralba, Antonio; Durand, Fredo
Where should saliency models look next? Conference
European Conference on Computer Vision (ECCV), 2016.
Tags: Deep Learning | Links:
@conference{Bylinskii2016,
title = {Where should saliency models look next?},
author = {Zoya Bylinskii and Adria Recasens and Ali Borji and Aude Oliva and Antonio Torralba and Fredo Durand},
url = {https://www.crcv.ucf.edu/papers/eccv2016/BylinskiiSaliencyECCV2016.pdf},
year = {2016},
date = {2016-02-22},
publisher = { European Conference on Computer Vision (ECCV)},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Ardeshir, Shervin; Borji, Ali
Ego2Top: Matching Viewers in Egocentric and Top-view Videos Conference
European Conference on Computer Vision (ECCV), 2016.
Tags: Deep Learning | Links:
@conference{Ardeshir2016,
title = {Ego2Top: Matching Viewers in Egocentric and Top-view Videos},
author = {Shervin Ardeshir and Ali Borji},
url = {https://www.crcv.ucf.edu/papers/eccv2016/Ego2Top_v2.pdf},
year = {2016},
date = {2016-02-21},
publisher = { European Conference on Computer Vision (ECCV)},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Gan, C.; Sun, C.; Duan, L.; Gong, Boqing
Webly-Supervised Video Recognition by Mutually Voting for Relevant Web Images and Web Video Frames Conference
European Conference on Computer Vision (ECCV), 2016.
Tags: Deep Learning | Links:
@conference{Gan2016,
title = {Webly-Supervised Video Recognition by Mutually Voting for Relevant Web Images and Web Video Frames},
author = {C. Gan and C. Sun and L. Duan and Boqing Gong},
url = {https://www.crcv.ucf.edu/people/faculty/Gong/Paper/webly-supervised.pdf},
year = {2016},
date = {2016-02-20},
publisher = {European Conference on Computer Vision (ECCV)},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Sharghi, Aidean; Gong, Boqing; Shah, Mubarak
Query-Focused Extractive Video Summarization Conference
European Conference on Computer Vision (ECCV), 2016.
Tags: Deep Learning | Links:
@conference{Sharghi2016,
title = { Query-Focused Extractive Video Summarization},
author = {Aidean Sharghi and Boqing Gong and Mubarak Shah},
url = {https://www.crcv.ucf.edu/people/faculty/Gong/Paper/query-focused.pdf
https://www.crcv.ucf.edu/people/faculty/Gong/Paper/supp-query-focused.pdf},
year = {2016},
date = {2016-02-19},
publisher = { European Conference on Computer Vision (ECCV)},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Feng, Mengyang; Borji, Ali; Lu, Huchuan
Fixation prediction with a combined model of bottom-up saliency and vanishing point Conference
Proceedings of WACV, 2016.
Tags: Deep Learning | Links:
@conference{Feng2016,
title = {Fixation prediction with a combined model of bottom-up saliency and vanishing point},
author = {Mengyang Feng and Ali Borji and Huchuan Lu},
url = {https://www.crcv.ucf.edu/papers/wacv2016.pdf},
year = {2016},
date = {2016-02-19},
publisher = {Proceedings of WACV},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Chao, W-L.; Changpinyo, S.; Gong, Boqing; Sha, F.
An Empirical Study and Analysis of Generalized Zero-Shot Learning for Object Recognition in the Wild Conference
European Conference on Computer Vision (ECCV), 2016.
Tags: Deep Learning | Links:
@conference{Chao2016,
title = { An Empirical Study and Analysis of Generalized Zero-Shot Learning for Object Recognition in the Wild},
author = {W-L. Chao and S. Changpinyo and Boqing Gong and F. Sha},
url = {https://www.crcv.ucf.edu/people/faculty/Gong/Paper/generalized-0-shot.pdf
https://www.crcv.ucf.edu/people/faculty/Gong/Paper/supp-generalized-0-shot.pdf},
year = {2016},
date = {2016-02-18},
publisher = { European Conference on Computer Vision (ECCV)},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Borji, Ali; Izadi, Saeed; Itti, Laurent
iLab-20M: A large-scale controlled object dataset to investigate deep learning Conference
IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2016), 2016.
Tags: Deep Learning | Links:
@conference{Borji2016,
title = {iLab-20M: A large-scale controlled object dataset to investigate deep learning},
author = {Ali Borji and Saeed Izadi and Laurent Itti},
url = {https://www.crcv.ucf.edu/papers/cvpr2016/Borji_CVPR2016.pdf},
year = {2016},
date = {2016-02-15},
publisher = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2016)},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Changpinyo, S.; Chao, W.; Gong, Boqing; Sha, F.
Synthesized Classifiers for Zero-Shot Learning Conference
Conference on Computer Vision and Pattern Recognition (CVPR 2016), 2016.
Tags: Deep Learning | Links:
@conference{Changpinyo2016,
title = {Synthesized Classifiers for Zero-Shot Learning},
author = {S. Changpinyo and W. Chao and Boqing Gong and F. Sha},
url = {https://www.crcv.ucf.edu/people/faculty/Gong/Paper/attribute-dg.pdf
https://www.crcv.ucf.edu/people/faculty/Gong/Paper/zero-shot-supp.pdf},
year = {2016},
date = {2016-02-14},
publisher = {Conference on Computer Vision and Pattern Recognition (CVPR 2016)},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Gan, C.; Yang, T.; Gong, Boqing
Learning Attributes Equals Multi-Source Domain Generalization Conference
Conference on Computer Vision and Pattern Recognition (CVPR 2016), 2016.
Tags: Deep Learning | Links:
@conference{Gan2016b,
title = { Learning Attributes Equals Multi-Source Domain Generalization},
author = {C. Gan and T. Yang and Boqing Gong},
url = {https://www.crcv.ucf.edu/people/faculty/Gong/Paper/attribute-dg.pdf},
year = {2016},
date = {2016-02-13},
publisher = { Conference on Computer Vision and Pattern Recognition (CVPR 2016)},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Soomro, Khurram; Idrees, Haroon; Shah, Mubarak
Predicting the Where and What of actors and actions through Online Action Localization Conference
Conference on Computer Vision and Pattern Recognition (CVPR 2016), 2016.
Tags: Deep Learning | Links:
@conference{Soomro2016,
title = { Predicting the Where and What of actors and actions through Online Action Localization},
author = {Khurram Soomro and Haroon Idrees and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/cvpr2016/Soomro_CVPR2016.pdf
https://www.youtube.com/watch?v=kaWIfSKfEj0},
year = {2016},
date = {2016-02-12},
publisher = {Conference on Computer Vision and Pattern Recognition (CVPR 2016)},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Souly, Nasim; Shah, Mubarak
Scene Labeling Using Sparse Precision Matrix Conference
IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2016), 2016.
Tags: Deep Learning | Links:
@conference{Souly2016,
title = { Scene Labeling Using Sparse Precision Matrix},
author = {Nasim Souly and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/cvpr2016/cvpr2016_Souly.pdf
https://www.youtube.com/watch?v=F34eVxzS6Ng},
year = {2016},
date = {2016-02-11},
publisher = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2016)},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Sultani, Waqas; Shah, Mubarak
What if we do not have multiple videos of the same action? - Video Action Localization Using Web Images Conference
IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2016), 2016.
Tags: Deep Learning | Links:
@conference{Sultani2016,
title = { What if we do not have multiple videos of the same action? - Video Action Localization Using Web Images},
author = {Waqas Sultani and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/cvpr2016/CVPR16_Waqas_AL.pdf
https://www.youtube.com/watch?v=99FE9XOeX-k},
year = {2016},
date = {2016-02-10},
publisher = { IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2016)},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Zhang, Yang; Gong, Boqing; Shah, Mubarak
Fast Zero-Shot Image Tagging Conference
IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2016), 2016.
Tags: Deep Learning, Zero-Shot Learning | Links:
@conference{Zhang2016,
title = { Fast Zero-Shot Image Tagging},
author = {Yang Zhang and Boqing Gong and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/cvpr2016/Zhang_CVPR2016.pdf
https://youtu.be/Pmv5JHKX2y4},
year = {2016},
date = {2016-02-10},
urldate = {2016-02-10},
publisher = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2016)},
keywords = {Deep Learning, Zero-Shot Learning},
pubstate = {published},
tppubtype = {conference}
}
Zhang, Dong
Spatiotemporal Graphs for Object Segmentation and Human Pose Estimation in Videos PhD Thesis
University of Central Florida, 2016.
Tags: Deep Learning | Links:
@phdthesis{Zhang2016b,
title = {Spatiotemporal Graphs for Object Segmentation and Human Pose Estimation in Videos},
author = {Dong Zhang},
url = {https://www.crcv.ucf.edu/papers/theses/},
year = {2016},
date = {2016-01-31},
school = {University of Central Florida},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {phdthesis}
}
Dehghan, Afshin
Global Data Association for Multiple Pedestrian Tracking PhD Thesis
University of Central Florida, 2016.
Tags: Deep Learning | Links:
@phdthesis{Dehghan2016b,
title = {Global Data Association for Multiple Pedestrian Tracking},
author = {Afshin Dehghan},
url = {https://www.crcv.ucf.edu/papers/theses/Dehghan.pdf
https://youtu.be/SgRSniLdpwk},
year = {2016},
date = {2016-01-29},
school = {University of Central Florida},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {phdthesis}
}
Zhou, Y; Teomete, U; Dandin, O; Osman, O; Dandinoglu, T; Bagci, Ulas; Zhao, W
Computer-Aided Detection (CADx) for Plastic Deformation Fractures in Pediatric Forearm Journal Article
In: Computers in Biology and Medicine, 2016.
Tags: Deep Learning | Links:
@article{Zhou2016b,
title = {Computer-Aided Detection (CADx) for Plastic Deformation Fractures in Pediatric Forearm},
author = {Y Zhou and U Teomete and O Dandin and O Osman and T Dandinoglu and Ulas Bagci and W Zhao},
url = {https://www.crcv.ucf.edu/papers/index.php},
year = {2016},
date = {2016-01-24},
journal = {Computers in Biology and Medicine},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {article}
}
Rawat, Yogesh Singh; Kankanhalli, Mohan
ConTagNet: Exploiting user context for image tag recommendation Proceedings Article
In: Proceedings of the 24th ACM international conference on Multimedia, pp. 1102–1106, 2016.
Tags:
@inproceedings{rawat2016contagnet,
title = {ConTagNet: Exploiting user context for image tag recommendation},
author = {Yogesh Singh Rawat and Mohan Kankanhalli},
year = {2016},
date = {2016-01-01},
booktitle = {Proceedings of the 24th ACM international conference on Multimedia},
pages = {1102--1106},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Rawat, Yogesh Singh; Kankanhalli, Mohan
Clicksmart: A context-aware viewpoint recommendation system for mobile photography Journal Article
In: IEEE Transactions on Circuits and Systems for Video Technology, vol. 27, no. 1, pp. 149–158, 2016.
Tags:
@article{rawat2016clicksmart,
title = {Clicksmart: A context-aware viewpoint recommendation system for mobile photography},
author = {Yogesh Singh Rawat and Mohan Kankanhalli},
year = {2016},
date = {2016-01-01},
journal = {IEEE Transactions on Circuits and Systems for Video Technology},
volume = {27},
number = {1},
pages = {149--158},
publisher = {IEEE},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2015
Papadakis, Georgios Z.; Millo, Corina; Bagci, Ulas; Sadowski, Samira M.; Stratakis, Constantine A.
Schmorl Nodes Can Cause Increased 68Ga DOTATATE Activity on PET/CT, Mimicking Metastasis in Patients With Neuroendocrine Malignancy Journal Article
In: Clinical Nuclear Medicine, 2015.
Tags: Deep Learning | Links:
@article{Papadakis2015,
title = {Schmorl Nodes Can Cause Increased 68Ga DOTATATE Activity on PET/CT, Mimicking Metastasis in Patients With Neuroendocrine Malignancy},
author = {Georgios Z. Papadakis and Corina Millo and Ulas Bagci and Samira M. Sadowski and Constantine A. Stratakis},
url = {http://europepmc.org/abstract/med/26562580},
year = {2015},
date = {2015-12-31},
journal = {Clinical Nuclear Medicine},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {article}
}
Dehghan, Afshin; Oreifej, Omar; Shah, Mubarak
Complex event recognition using constrained low-rank representation Journal Article
In: Image and Vision Computing (IVC), 2015.
Tags: Deep Learning | Links:
@article{Dehghan2015c,
title = {Complex event recognition using constrained low-rank representation},
author = {Afshin Dehghan and Omar Oreifej and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/EventRec_IVC},
year = {2015},
date = {2015-12-30},
journal = {Image and Vision Computing (IVC)},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {article}
}
Idrees, Haroon; Soomro, Khurram; Shah, Mubarak
Detecting Humans in Dense Crowds using Locally-Consistent Scale Prior and Global Occlusion Reasoning Journal Article
In: IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI), 2015.
Tags: Crowd Analysis, Deep Learning | Links:
@article{Idrees2015,
title = {Detecting Humans in Dense Crowds using Locally-Consistent Scale Prior and Global Occlusion Reasoning},
author = {Haroon Idrees and Khurram Soomro and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/Idrees%20et%20al.%20-%20Detecting%20Humans%20in%20Dense%20Crowds%20-%20PAMI15.pdf},
year = {2015},
date = {2015-12-29},
urldate = {2015-12-29},
journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI)},
keywords = {Crowd Analysis, Deep Learning},
pubstate = {published},
tppubtype = {article}
}
Souly, Nasim; Shah, Mubarak
Visual Saliency Detection Using Group Lasso Regularization in Videos of Natural Scenes Journal Article
In: International Journal of Computer Vision, 2015.
Tags: Deep Learning | Links:
@article{Souly2015,
title = {Visual Saliency Detection Using Group Lasso Regularization in Videos of Natural Scenes},
author = {Nasim Souly and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/ijcv2015.pdf},
year = {2015},
date = {2015-12-28},
journal = {International Journal of Computer Vision},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {article}
}
Wang, Lingshu; Shi, Wei; Joyce, M. Gordon; Modjarrad, Kayvon; Zhang, Yi; Leung, Kwanyee; Lees, Christopher R.; Zhou, Tongqing; Yassine, Hadi M.; Kanekiyo, Masaru; Yang, Zhi-yong; Chen, Xinjian; Becker, Michelle M.; Freeman, Megan; Vogel, Leatrice; Johnson, Joshua C.; Olinger, Gene; Todd, John P.; Bagci, Ulas; Solomon, Jeffrey; Mollura, Daniel J.; Hensley, Lisa; Jahrling, Peter; Denison, Mark R.; Rao, Srinivas S.; Subbarao, Kanta; Kwong, Peter D.; Mascola, John R.; Kong, Wing-Pui; Graham, Barney S.
Evaluation of Candidate Vaccine Approaches for MERS-CoV Journal Article
In: Nature Communications, 2015.
Tags: Deep Learning | Links:
@article{Becker2015,
title = {Evaluation of Candidate Vaccine Approaches for MERS-CoV},
author = {Lingshu Wang and Wei Shi and M. Gordon Joyce and Kayvon Modjarrad and Yi Zhang and Kwanyee Leung and Christopher R. Lees and Tongqing Zhou and Hadi M. Yassine and Masaru Kanekiyo and Zhi-yong Yang and Xinjian Chen and Michelle M. Becker and Megan Freeman and Leatrice Vogel and Joshua C. Johnson and Gene Olinger and John P. Todd and Ulas Bagci and Jeffrey Solomon and Daniel J. Mollura and Lisa Hensley and Peter Jahrling and Mark R. Denison and Srinivas S. Rao and Kanta Subbarao and Peter D. Kwong and John R. Mascola and Wing-Pui Kong and Barney S. Graham},
url = {https://www.crcv.ucf.edu/papers/ncomms8712.pdf
http://doi.org/10.1038/ncomms8712},
year = {2015},
date = {2015-12-27},
journal = {Nature Communications},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {article}
}
Camp, Jeremy V.; Bagci, Ulas; Chu, Y.; Squier, B.; Fraig, M.; Uriarte, S. M.; Guo, H.; Mollura, Daniel J.; Jonsson, Colleen B.
Lower Respiratory Tract Infection of the Ferret by 2009 H1N1 Pandemic Influenza A Virus Triggers Biphasic Systemic and Local Neutrophil Recruitment Journal Article
In: Journal of Virology, 2015.
Tags: Deep Learning | Links:
@article{Camp2015,
title = {Lower Respiratory Tract Infection of the Ferret by 2009 H1N1 Pandemic Influenza A Virus Triggers Biphasic Systemic and Local Neutrophil Recruitment},
author = {Jeremy V. Camp and Ulas Bagci and Y. Chu and B. Squier and M. Fraig and S.M. Uriarte and H. Guo and Daniel J. Mollura and Colleen B. Jonsson},
url = {https://www.crcv.ucf.edu/papers/index.php},
year = {2015},
date = {2015-12-25},
journal = {Journal of Virology},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {article}
}
Xu, Ziyue; Bagci, Ulas; Foster, Brent; Mansoor, Awais; Udupa, Jayaram K.; Mollura, Daniel J.
A Hybrid Method for Airway Segmentation and Automated Measurement of Bronchial Wall Thickness on CT Journal Article
In: Medical Image Analysis, 2015.
Tags: Deep Learning | Links:
@article{Xu2015b,
title = {A Hybrid Method for Airway Segmentation and Automated Measurement of Bronchial Wall Thickness on CT},
author = {Ziyue Xu and Ulas Bagci and Brent Foster and Awais Mansoor and Jayaram K. Udupa and Daniel J. Mollura},
url = {https://www.crcv.ucf.edu/papers/MIA2015.pdf},
year = {2015},
date = {2015-12-24},
journal = {Medical Image Analysis},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {article}
}
Kalayeh, Mahdi M.; Seifu, Misrak; LaLanne, Wesna; Shah, Mubarak
How to Take a Good Selfie? Conference
ACM Multimedia Conference, 2015.
Tags: REU
@conference{Kalayeh2015b,
title = {How to Take a Good Selfie?},
author = {Mahdi M. Kalayeh and Misrak Seifu and Wesna LaLanne and Mubarak Shah },
year = {2015},
date = {2015-10-26},
booktitle = {ACM Multimedia Conference},
keywords = {REU},
pubstate = {published},
tppubtype = {conference}
}
Chen, Kenny; Stanfill, Robert; Mahalanobis, Abhijit
Aided target recognition using hyperdimensional manifolds Proceedings
Automatic Target Recognition XXV, 2015.
Tags: Deep Learning
@proceedings{Chen2015,
title = {Aided target recognition using hyperdimensional manifolds},
author = {Kenny Chen and Robert Stanfill and Abhijit Mahalanobis},
year = {2015},
date = {2015-05-22},
publisher = {Automatic Target Recognition XXV},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {proceedings}
}
Chen, Kenny; Mahalanobis, Abhijit; Stanfill, Robert; Javidi, Bahram
Integral imaging for anti-access/area denial environments Proceedings
Three-Dimensional Imaging, Visualization, and Display , 2015.
Tags: Deep Learning
@proceedings{Chen2015b,
title = {Integral imaging for anti-access/area denial environments},
author = {Kenny Chen and Abhijit Mahalanobis and Robert Stanfill and Bahram Javidi},
year = {2015},
date = {2015-05-22},
publisher = {Three-Dimensional Imaging, Visualization, and Display },
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {proceedings}
}
Veras, J.; Muise, Robert; Hines, K.; Mahalanobis, Abhijit; Neifeld, Mark
Computational imaging in a multiplexed imager with static multispectral encoding Proceedings
Compressive Sensing IV, 2015.
Tags: Deep Learning
@proceedings{Veras2015,
title = {Computational imaging in a multiplexed imager with static multispectral encoding},
author = {J. Veras and Robert Muise and K. Hines and Abhijit Mahalanobis and Mark Neifeld},
year = {2015},
date = {2015-05-14},
publisher = {Compressive Sensing IV},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {proceedings}
}
Hussein, Sarfaraz; Bagci, Ulas
Transferability of 3D CNN features for Organ Detection Conference
NIPS 2015 Workshop on Machine Learning in Healthcare, 2015.
Tags: Deep Learning
@conference{Hussein2015,
title = {Transferability of 3D CNN features for Organ Detection},
author = {Sarfaraz Hussein and Ulas Bagci},
year = {2015},
date = {2015-02-08},
publisher = {NIPS 2015 Workshop on Machine Learning in Healthcare},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Soomro, Khurram; Khokhar, Salman; Shah, Mubarak
Tracking when the camera looks away Conference
International Conference on Computer Vision Workshop (ICCVW), 2015.
Tags: Deep Learning
@conference{Soomro2015,
title = {Tracking when the camera looks away},
author = {Khurram Soomro and Salman Khokhar and Mubarak Shah},
year = {2015},
date = {2015-02-06},
publisher = {International Conference on Computer Vision Workshop (ICCVW)},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Zhang, Dong; Shah, Mubarak
Human Pose Estimation in Videos Conference
International Conference on Computer Vision (ICCV), 2015.
Tags: Deep Learning | Links:
@conference{Zhang2015,
title = {Human Pose Estimation in Videos},
author = {Dong Zhang and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/2130(CR).pdf},
year = {2015},
date = {2015-02-05},
publisher = {International Conference on Computer Vision (ICCV)},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Soomro, Khurram; Idrees, Haroon; Shah, Mubarak
Action Localization in Videos through Context Walk Conference
International Conference on Computer Vision (ICCV), 2015.
Tags: Deep Learning | Links:
@conference{Soomro2015b,
title = {Action Localization in Videos through Context Walk},
author = {Khurram Soomro and Haroon Idrees and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/actionLocal_CombinedV13.pdf},
year = {2015},
date = {2015-02-05},
publisher = {International Conference on Computer Vision (ICCV)},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Gao, Mingchen; Bagci, Ulas; Lu, Le; Wu, A.; Buty, Mario; Shin, H-C.; Roth, H.; Papadakis, Georgios Z.; Depeursinge, A.; Summers, R. M.; Xu, Ziyue; Mollura, Daniel J.
Holistic Classification of CT Attenuation Patterns for Interstitial Lung Diseases via Deep Convolutional Neural Networks Conference
2015.
Tags: Deep Learning | Links:
@conference{Gao2015,
title = {Holistic Classification of CT Attenuation Patterns for Interstitial Lung Diseases via Deep Convolutional Neural Networks},
author = {Mingchen Gao and Ulas Bagci and Le Lu and A. Wu and Mario Buty and H-C. Shin and H. Roth and Georgios Z. Papadakis and A. Depeursinge and R.M. Summers and Ziyue Xu and Daniel J. Mollura},
url = {https://www.crcv.ucf.edu/papers/miccai_cnn_workshop_final.pdf},
year = {2015},
date = {2015-02-04},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Vaca-Castano, Gonzalo; Shah, Mubarak
Semantic Image Search From Multiple Query Images Conference
ACM Multimedia Conference 2015 (ACMMM 2015), 2015.
Tags: Deep Learning | Links:
@conference{Vaca-Castano2015,
title = {Semantic Image Search From Multiple Query Images},
author = {Gonzalo Vaca-Castano and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/acmmm15/FinalV4.pdf
https://www.crcv.ucf.edu/papers/Semantic%20Image%20Search%20from%20Multiple%20Query%20Images.pptx},
year = {2015},
date = {2015-02-04},
publisher = {ACM Multimedia Conference 2015 (ACMMM 2015)},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Kalayeh, Mahdi M.; Seifu, Misrak; LaLanne, Wesna; Shah, Mubarak
How to Take a Good Selfie? Conference
ACM Multimedia Conference 2015 (ACMMM 2015), 2015.
Tags: Deep Learning | Links:
@conference{Kalayeh2015,
title = { How to Take a Good Selfie?},
author = {Mahdi M. Kalayeh and Misrak Seifu and Wesna LaLanne and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/acmmm15/Selfie.pdf},
year = {2015},
date = {2015-02-03},
publisher = {ACM Multimedia Conference 2015 (ACMMM 2015)},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Bagci, Ulas; Papadakis, Georgios Z.; Xu, Ziyue; Green, Aileen; Osman, Medhat; Shah, Mubarak
Nuclear Medicine Meets Computer Vision: Increasing Role of Computerized Detection, Tracking, Diagnosis, and Quantification of PET/CT and PET/MRI Studies Conference
SNMMI (Society of Nuclear Medicine and Molecular Imaging) Conference 2015, 2015.
Tags: Deep Learning
@conference{Bagci2015,
title = {Nuclear Medicine Meets Computer Vision: Increasing Role of Computerized Detection, Tracking, Diagnosis, and Quantification of PET/CT and PET/MRI Studies},
author = {Ulas Bagci and Georgios Z. Papadakis and Ziyue Xu and Aileen Green and Medhat Osman and Mubarak Shah},
year = {2015},
date = {2015-02-03},
publisher = {SNMMI (Society of Nuclear Medicine and Molecular Imaging) Conference 2015},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Green, Aileen; Bagci, Ulas; Kelly, Patrick V.; Osman, Medhat
Brown adipose tissue detected by FDG PET/CT is associated with less central obesity compared to body mass index matched controls Conference
SNMMI (Society of Nuclear Medicine and Molecular Imaging) Conference 2015., 2015.
Tags: Deep Learning
@conference{Green2015,
title = {Brown adipose tissue detected by FDG PET/CT is associated with less central obesity compared to body mass index matched controls},
author = {Aileen Green and Ulas Bagci and Patrick V. Kelly and Medhat Osman},
year = {2015},
date = {2015-02-02},
publisher = {SNMMI (Society of Nuclear Medicine and Molecular Imaging) Conference 2015.},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Green, Aileen; Bagci, Ulas; Kelly, Patrick V.; Osman, Medhat
Brown Adipose Tissue Detected by FDG PET/CT is Associated with Less Visceral Fat Conference
SNMMI (Society of Nuclear Medicine and Molecular Imaging) Conference 2015, 2015.
Tags: Deep Learning
@conference{Green2015b,
title = {Brown Adipose Tissue Detected by FDG PET/CT is Associated with Less Visceral Fat},
author = {Aileen Green and Ulas Bagci and Patrick V. Kelly and Medhat Osman},
year = {2015},
date = {2015-02-01},
publisher = {SNMMI (Society of Nuclear Medicine and Molecular Imaging) Conference 2015},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Dehghan, Afshin; Tian, Yicong; Torr, Philip; Shah, Mubarak
Target Identity-aware Network Flow for Online Multiple Target Tracking Conference
IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2015), 2015.
Tags: Deep Learning | Links:
@conference{Dehghan2015,
title = { Target Identity-aware Network Flow for Online Multiple Target Tracking},
author = {Afshin Dehghan and Yicong Tian and Philip Torr and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/cvpr2015/AfshinDehghan_TINF_CVPR15.pdf
https://youtu.be/4LJ7SWOWGUg},
year = {2015},
date = {2015-01-31},
publisher = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2015)},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Dehghan, Afshin; Assari, Shayan Modiri; Shah, Mubarak
GMMCP-Tracker: Globally Optimal Generalized Maximum Multi Clique Problem for Multiple Object Tracking Conference
IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2015), 2015.
Tags: Deep Learning | Links:
@conference{Dehghan2015b,
title = {GMMCP-Tracker: Globally Optimal Generalized Maximum Multi Clique Problem for Multiple Object Tracking},
author = {Afshin Dehghan and Shayan Modiri Assari and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/cvpr2015/AfshinDehghan_GMMCP_CVPR15.pdf
https://www.crcv.ucf.edu/projects/GMMCP-Tracker/
https://youtu.be/6zlnJUyILxk},
year = {2015},
date = {2015-01-30},
publisher = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2015)},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Ardeshir, Shervin; Collins-Sibley, Kofi Malcolm; Shah, Mubarak
Geo-semantic Segmentation Conference
IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2015), 2015.
Tags: Deep Learning, Geo-Localization, REU, Semantic Segmentation | Links:
@conference{Ardeshir2015,
title = {Geo-semantic Segmentation},
author = {Shervin Ardeshir and Kofi Malcolm Collins-Sibley and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/cvpr2015/Geo-semantic_Segmentation.pdf
https://www.crcv.ucf.edu/projects/Geosemantic/
https://www.youtube.com/watch?v=jf5U3gdF5yEhttps://www.youtube.com/watch?v=jf5U3gdF5yE
},
year = {2015},
date = {2015-01-29},
urldate = {2015-01-29},
publisher = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR 2015)},
keywords = {Deep Learning, Geo-Localization, REU, Semantic Segmentation},
pubstate = {published},
tppubtype = {conference}
}
Xu, Ziyue; Bagci, Ulas; Gao, Mingchen; Mollura, Daniel J.
Highly Precise Partial Volume Correction for PET Images: An Iterative Approach via Shape Consistency Conference
IEEE International Symposium of Biomedical Imaging (ISBI), 2015.
Tags: Deep Learning | Links:
@conference{Z.Xu2015,
title = {Highly Precise Partial Volume Correction for PET Images: An Iterative Approach via Shape Consistency},
author = {Ziyue Xu and Ulas Bagci and Mingchen Gao and Daniel J. Mollura},
url = {https://www.crcv.ucf.edu/papers/ISBI2015_PVC.pdf},
year = {2015},
date = {2015-01-28},
publisher = {IEEE International Symposium of Biomedical Imaging (ISBI)},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Borzeshi, Ehsan Zare; Dehghan, Afshin; Piccardi, Massimo; Shah, Mubarak
Complex Event Recognition by Latent Temporal Models of Concepts Conference
ICIP 2014, 2015.
Tags: Deep Learning | Links:
@conference{Borzeshi2015,
title = { Complex Event Recognition by Latent Temporal Models of Concepts},
author = {Ehsan Zare Borzeshi and Afshin Dehghan and Massimo Piccardi and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/LSSVM_HCRF.pdf},
year = {2015},
date = {2015-01-27},
publisher = {ICIP 2014},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Papadakis, Georgios Z.; Millo, Corina; Bagci, Ulas; Patronas, Nicholas J.; Stratakis, Constantine A.
Talc Pleurodesis with intense 18F-FDG activity but no 68Ga-DOTA-TATE activity on PET/CT Journal Article
In: Clinical Nuclear Medicine, 2015.
Tags: Deep Learning | Links:
@article{Papadakis2015b,
title = {Talc Pleurodesis with intense 18F-FDG activity but no 68Ga-DOTA-TATE activity on PET/CT},
author = {Georgios Z. Papadakis and Corina Millo and Ulas Bagci and Nicholas J. Patronas and Constantine A. Stratakis},
url = {https://www.crcv.ucf.edu/papers/index.php},
year = {2015},
date = {2015-01-14},
journal = {Clinical Nuclear Medicine},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {article}
}
Mansoor, Awais; Bagci, Ulas; Foster, Brent; Xu, Ziyue; Papadakis, Georgios Z.; Folio, Les R.; Udupa, Jayaram K.; Mollura, Daniel J.
Segmentation and Image Analysis of Abnormal Lungs at CT: Current Approaches, Challenges, and Future Trends Journal Article
In: Radiographics , 2015.
Tags: Deep Learning | Links:
@article{A.Mansoor2015,
title = {Segmentation and Image Analysis of Abnormal Lungs at CT: Current Approaches, Challenges, and Future Trends},
author = {Awais Mansoor and Ulas Bagci and Brent Foster and Ziyue Xu and Georgios Z. Papadakis and Les R. Folio and Jayaram K. Udupa and Daniel J. Mollura},
url = {https://www.crcv.ucf.edu/papers/radiographics},
year = {2015},
date = {2015-01-12},
journal = {Radiographics },
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {article}
}
Papadakis, Georgios Z.; Bagci, Ulas; Sadowski, Samira M.; Patronas, Nicholas J.; Stratakis, Constantine A.
Ectopic ACTH and CRH co-secreting tumor localized by 68Ga-DOTA-TATE PET/CT Journal Article
In: Clinical Nuclear Medicine, 2015.
Tags: Deep Learning | Links:
@article{Papadakis2015c,
title = {Ectopic ACTH and CRH co-secreting tumor localized by 68Ga-DOTA-TATE PET/CT},
author = {Georgios Z. Papadakis and Ulas Bagci and Samira M. Sadowski and Nicholas J. Patronas and Constantine A. Stratakis},
url = {https://www.crcv.ucf.edu/papers/index.php},
year = {2015},
date = {2015-01-11},
journal = {Clinical Nuclear Medicine},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {article}
}
Kalayeh, Mahdi M; Mussmann, Stephen; Petrakova, Alla; da Vitoria Lobo, Niels; Shah, Mubarak
Understanding trajectory behavior: A motion pattern approach Journal Article
In: arXiv, 2015.
Abstract | Tags: Motion Patterns | Links:
@article{Kalayeh2015c,
title = {Understanding trajectory behavior: A motion pattern approach},
author = {Mahdi M Kalayeh and Stephen Mussmann and Alla Petrakova and Niels da Vitoria Lobo and Mubarak Shah},
url = {https://www.crcv.ucf.edu/wp-content/uploads/2018/11/1501.00614.pdf},
doi = {https://doi.org/10.48550/arXiv.1501.00614},
year = {2015},
date = {2015-01-04},
urldate = {2015-01-04},
journal = {arXiv},
abstract = {Mining the underlying patterns in gigantic and complex data is of great importance to data analysts. In this paper, we propose a motion pattern approach to mine frequent behaviors in trajectory data. Motion patterns, defined by a set of highly similar flow vector groups in a spatial locality, have been shown to be very effective in extracting dominant motion behaviors in video sequences. Inspired by applications and properties of motion patterns, we have designed a framework that successfully solves the general task of trajectory clustering. Our proposed algorithm consists of four phases: flow vector computation, motion component extraction, motion component's reachability set creation, and motion pattern formation. For the first phase, we break down trajectories into flow vectors that indicate instantaneous movements. In the second phase, via a Kmeans clustering approach, we create motion components by clustering the flow vectors with respect to their location and velocity. Next, we create motion components' reachability set in terms of spatial proximity and motion similarity. Finally, for the fourth phase, we cluster motion components using agglomerative clustering with the weighted Jaccard distance between the motion components' signatures, a set created using path reachability. We have evaluated the effectiveness of our proposed method in an extensive set of experiments on diverse datasets. Further, we have shown how our proposed method handles difficulties in the general task of trajectory clustering that challenge the existing state-of-the-art methods.},
keywords = {Motion Patterns},
pubstate = {published},
tppubtype = {article}
}
Rawat, Yogesh Singh; Kankanhalli, Mohan
Context-aware photography learning for smart mobile devices Journal Article
In: ACM Transactions on Multimedia Computing, Communications, and Applications (TOMM), vol. 12, no. 1s, pp. 1–24, 2015.
Tags:
@article{rawat2015context,
title = {Context-aware photography learning for smart mobile devices},
author = {Yogesh Singh Rawat and Mohan Kankanhalli},
year = {2015},
date = {2015-01-01},
journal = {ACM Transactions on Multimedia Computing, Communications, and Applications (TOMM)},
volume = {12},
number = {1s},
pages = {1--24},
publisher = {ACM New York, NY, USA},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Rawat, Yogesh Singh
Real-time assistance in multimedia capture using social media Proceedings Article
In: Proceedings of the 23rd ACM international conference on Multimedia, pp. 641–644, 2015.
Tags:
@inproceedings{rawat2015real,
title = {Real-time assistance in multimedia capture using social media},
author = {Yogesh Singh Rawat},
year = {2015},
date = {2015-01-01},
booktitle = {Proceedings of the 23rd ACM international conference on Multimedia},
pages = {641--644},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
2014
Mahalanobis, Abhijit; Muise, Robert; Roy, S.
Efficient target detection using an adaptive compressive imager Journal Article
In: IEEE Transactions on Aerospace and Electronic Systems, vol. 50, no. 4, pp. 2528-2540, 2014.
Tags: Deep Learning
@article{Mahalanobis2014c,
title = {Efficient target detection using an adaptive compressive imager},
author = {Abhijit Mahalanobis and Robert Muise and S. Roy},
year = {2014},
date = {2014-10-13},
journal = {IEEE Transactions on Aerospace and Electronic Systems},
volume = {50},
number = {4},
pages = {2528-2540},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {article}
}
Dehghan, Afshin; Ortiz, Enrique G.; Villegas, Ruben; Shah, Mubarak
Who Do I Look Like? Determining Parent-Offspring Resemblance via Genetic Features Conference
CVPR , 2014.
Tags: REU
@conference{Dehghan2014c,
title = {Who Do I Look Like? Determining Parent-Offspring Resemblance via Genetic Features},
author = {Afshin Dehghan and Enrique G. Ortiz and Ruben Villegas and Mubarak Shah },
year = {2014},
date = {2014-06-23},
booktitle = {CVPR },
keywords = {REU},
pubstate = {published},
tppubtype = {conference}
}
Mahalanobis, Abhijit; Muise, Robert; Roy, S.
Adaptive compressive sensing for target detection Proceedings
SPIE, vol. 9090, 2014.
Tags: Deep Learning
@proceedings{Mahalanobis2014d,
title = {Adaptive compressive sensing for target detection},
author = {Abhijit Mahalanobis and Robert Muise and S. Roy},
year = {2014},
date = {2014-06-01},
volume = {9090},
publisher = {SPIE},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {proceedings}
}
Mahalanobis, Abhijit; Neifeld, Mark
Optimizing Measurements for Feature Specific Compressive Sensing Journal Article
In: Applied Optics , vol. 53, no. 26, pp. 6108-6118, 2014.
Tags: Deep Learning
@article{Mahalanobis2014b,
title = {Optimizing Measurements for Feature Specific Compressive Sensing},
author = {Abhijit Mahalanobis and Mark Neifeld},
year = {2014},
date = {2014-02-28},
journal = {Applied Optics },
volume = {53},
number = {26},
pages = {6108-6118},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {article}
}
Mahalanobis, Abhijit; Shilling, Robert; Murphy, Robert; Muise, R.
Recent results of medium wave infrared compressive sensing Journal Article
In: Applied Optics , vol. 53, no. 34, pp. 8060-8070, 2014.
Tags: Deep Learning
@article{Mahalanobis2014,
title = {Recent results of medium wave infrared compressive sensing},
author = {Abhijit Mahalanobis and Robert Shilling and Robert Murphy and R. Muise},
year = {2014},
date = {2014-02-12},
journal = {Applied Optics },
volume = {53},
number = {34},
pages = {8060-8070},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {article}
}
Shu, Guang
Human Detection, Tracking and Segmentation in Surveillance Video PhD Thesis
University of Central Florida, 2014.
Tags: Deep Learning | Links:
@phdthesis{Shu2014,
title = {Human Detection, Tracking and Segmentation in Surveillance Video},
author = {Guang Shu},
url = {https://www.crcv.ucf.edu/papers/theses/Shu.pdf},
year = {2014},
date = {2014-01-31},
school = {University of Central Florida},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {phdthesis}
}
Zamir, Amir Roshan; Shah, Mubarak
Image Geo-localization Based on Multiple Nearest Neighbor Feature Matching Using Generalized Graphs Journal Article
In: IEEE Transactions on Pattern Analysis and Machine Intelligence, 2014.
Tags: Deep Learning, Geo-Localization, Video Geo-localization | Links:
@article{Zamir2014b,
title = {Image Geo-localization Based on Multiple Nearest Neighbor Feature Matching Using Generalized Graphs},
author = {Amir Roshan Zamir and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/PAMI_Amir%20Zamir.pdf},
year = {2014},
date = {2014-01-29},
urldate = {2014-01-29},
journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
keywords = {Deep Learning, Geo-Localization, Video Geo-localization},
pubstate = {published},
tppubtype = {article}
}
Dey, Soumyabrata; Rao, A. Ravishankar; Shah, Mubarak
Attributed graph distance measure for automatic detection of Attention Deficit Hyperactive Disordered subjects Journal Article
In: Frontiers in Neural Circuits, 2014.
Tags: Deep Learning | Links:
@article{Dey2014,
title = {Attributed graph distance measure for automatic detection of Attention Deficit Hyperactive Disordered subjects},
author = {Soumyabrata Dey and A. Ravishankar Rao and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/fncir-08-00064.pdf},
year = {2014},
date = {2014-01-29},
journal = {Frontiers in Neural Circuits},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {article}
}
Hou, Rui; Zamir, Amir Roshan; Sukthankar, Rahul; Shah, Mubarak
DaMN - Discriminative and Mutually Nearest: Exploiting Pairwise Category Proximity for Video Action Recognition Conference
ECCV 2014, 2014.
Tags: Deep Learning | Links:
@conference{Hou2014,
title = { DaMN - Discriminative and Mutually Nearest: Exploiting Pairwise Category Proximity for Video Action Recognition},
author = {Rui Hou and Amir Roshan Zamir and Rahul Sukthankar and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/DaMN_ECCV14.pdf},
year = {2014},
date = {2014-01-29},
publisher = {ECCV 2014},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Idrees, Haroon
Visual Analysis of Extremely Dense Crowded Scenes PhD Thesis
University of Central Florida, 2014.
Tags: Deep Learning | Links:
@phdthesis{Idrees2014b,
title = {Visual Analysis of Extremely Dense Crowded Scenes},
author = {Haroon Idrees},
url = {https://www.crcv.ucf.edu/papers/theses/Idrees.pdf},
year = {2014},
date = {2014-01-28},
school = {University of Central Florida},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {phdthesis}
}
Dey, Soumyabrata
Automatic Detection of Brain Functional Disorder Using Imaging Data PhD Thesis
University of Central Florida, 2014.
Tags: Deep Learning | Links:
@phdthesis{Dey2014b,
title = {Automatic Detection of Brain Functional Disorder Using Imaging Data},
author = {Soumyabrata Dey},
url = {https://www.crcv.ucf.edu/papers/theses/Dey.pdf},
year = {2014},
date = {2014-01-28},
school = {University of Central Florida},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {phdthesis}
}
Smeulders, Arnold W. M.; Chu, Dung M.; Cucchiara, Rita; Calderara, Simone; Dehghan, Afshin; Shah, Mubarak
Visual Tracking: an Experimental Survey Journal Article
In: IEEE Transaction on Pattern Analysis and Machine Intelligence, 2014.
Tags: Deep Learning | Links:
@article{Smeulders2014,
title = {Visual Tracking: an Experimental Survey},
author = {Arnold W. M. Smeulders and Dung M. Chu and Rita Cucchiara and Simone Calderara and Afshin Dehghan and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/Tracking_Survey.pdf},
year = {2014},
date = {2014-01-27},
journal = {IEEE Transaction on Pattern Analysis and Machine Intelligence},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {article}
}
Borzeshi, Ehsan Zare; Dehghan, Afshin; Piccardi, Massimo; Shah, Mubarak
Complex Event Recognition by Latent Temporal Models of Concepts Conference
ICIP 2014, 2014.
Tags: Deep Learning | Links:
@conference{Borzeshi2015b,
title = { Complex Event Recognition by Latent Temporal Models of Concepts},
author = {Ehsan Zare Borzeshi and Afshin Dehghan and Massimo Piccardi and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/LSSVM_HCRF.pdf},
year = {2014},
date = {2014-01-27},
publisher = {ICIP 2014},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Bhattacharya, Subhabrata; Mehran, Ramin; Sukthankar, Rahul; Shah, Mubarak
Classification of Cinematographic Shots using Lie Algebra and its Application to Complex Event Recognition Journal Article
In: IEEE Transactions on Multimedia, 2014.
Tags: Deep Learning | Links:
@article{Bhattacharya2014b,
title = {Classification of Cinematographic Shots using Lie Algebra and its Application to Complex Event Recognition},
author = {Subhabrata Bhattacharya and Ramin Mehran and Rahul Sukthankar and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/tmm2012cam-v6.pdf
http://dx.doi.org/10.1109/TMM.2014.2300833},
year = {2014},
date = {2014-01-26},
journal = {IEEE Transactions on Multimedia},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {article}
}
Yang, Yang; Shah, Mubarak
Learning Discriminative Features and Metrics for Measuring Action Similarity Conference
ICIP 2014, 2014.
Tags: Deep Learning | Links:
@conference{Yang2015,
title = {Learning Discriminative Features and Metrics for Measuring Action Similarity},
author = {Yang Yang and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/ICIP2014_CameraReady_Yang.pdf},
year = {2014},
date = {2014-01-26},
publisher = { ICIP 2014},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Idrees, Haroon; Warner, Nolan; Shah, Mubarak
Tracking in dense crowds using prominence and neighborhood motion concurrence Journal Article
In: Image and Vision Computing, 2014.
Tags: Crowd Analysis, Deep Learning | Links:
@article{Idrees2014,
title = {Tracking in dense crowds using prominence and neighborhood motion concurrence},
author = {Haroon Idrees and Nolan Warner and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/CrowdTracking_IVC2013.pdf
http://dx.doi.org/10.1016/j.imavis.2013.10.006},
year = {2014},
date = {2014-01-26},
urldate = {2014-01-26},
journal = {Image and Vision Computing},
keywords = {Crowd Analysis, Deep Learning},
pubstate = {published},
tppubtype = {article}
}
Zamir, Amir Roshan; Ardeshir, Shervin; Shah, Mubarak
GPS-Tag Refinement using Random Walks with an Adaptive Damping Factor Conference
CVPR 2014, 2014.
Tags: Deep Learning, Geo-Localization | Links:
@conference{Zamir2014,
title = {GPS-Tag Refinement using Random Walks with an Adaptive Damping Factor},
author = {Amir Roshan Zamir and Shervin Ardeshir and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/cvpr2014/TagRefinement_CVPR14_Camready_v5.pdf
https://www.crcv.ucf.edu/papers/cvpr2014/video-spotlight-514.mp4
http://www.youtube.com/watch?v=0qbKQHJu-ow&list=PLd3hlSJsX_InWZEJNJG_yoGs-zy-9D-J7&feature=share},
year = {2014},
date = {2014-01-26},
urldate = {2014-01-26},
publisher = {CVPR 2014},
keywords = {Deep Learning, Geo-Localization},
pubstate = {published},
tppubtype = {conference}
}
Bhattacharya, Subhabrata; Kalayeh, Mahdi M.; Sukthankar, Rahul; Shah, Mubarak
Recognition of Complex Events exploiting Temporal Dynamics between Underlying Concepts Conference
CVPR 2014, 2014.
Tags: Deep Learning | Links:
@conference{Bhattacharya2014,
title = {Recognition of Complex Events exploiting Temporal Dynamics between Underlying Concepts},
author = {Subhabrata Bhattacharya and Mahdi M. Kalayeh and Rahul Sukthankar and Mubarak Shah},
url = {https://www.crcv.ucf.edu/papers/cvpr2014/CVPR14-CTR.pdf
http://techtalks.tv/talks/recognition-of-complex-events-exploiting-temporal-dynamics-between-underlying-concepts/60305/},
year = {2014},
date = {2014-01-26},
publisher = {CVPR 2014},
keywords = {Deep Learning},
pubstate = {published},
tppubtype = {conference}
}
Ardeshir, Shervin; Zamir, Amir Roshan; Torroella, Alejandro; Shah, Mubarak
GIS-Assisted Object Detection and Geospatial Localization Conference
ECCV 2014, 2014.
Tags: Deep Learning,