Skip to content

Latest commit

 

History

History
32 lines (32 loc) · 24.1 KB

machine_learning_based_papers.md

File metadata and controls

32 lines (32 loc) · 24.1 KB

A bibliography of papers that apply machine learning to analyze Ethereum smart contracts.

Title Year Venue Citation Abstract
Smart Contract Vulnerability Detection Based on Hybrid Attention Mechanism 2023 Appl. Sci.
cite/
@Article{app13020770,
AUTHOR = {Wu, Huaiguang and Dong, Hanjie and He, Yaqiong and Duan, Qianheng},
TITLE = {Smart Contract Vulnerability Detection Based on Hybrid Attention Mechanism Model},
JOURNAL = {Applied Sciences},
VOLUME = {13},
YEAR = {2023},
NUMBER = {2},
ARTICLE-NUMBER = {770},
URL = {https://www.mdpi.com/2076-3417/13/2/770},
ISSN = {2076-3417},
ABSTRACT = {A smart contract, as an important part of blockchain technology, has attracted considerable interest from both industry and academia. It provides the basis for the realization of a variety of practical blockchain applications and plays a crucial role in the blockchain ecosystem. While it also holds a large number of digital assets, the frequent occurrence of smart contract vulnerabilities have caused huge economic losses and destroyed the blockchain-based credit system. Currently, the security and reliability of smart contracts have become a new focus of research, and there are a number of smart contract vulnerability detection methods, such as traditional detection tools based on static or dynamic analysis. However, most of them often rely on expert rules, and therefore have poor scalability and high false negative and false positive rates. Recent deep learning methods alleviate this issue, but without considering the semantic information and context of source code. To this end, we propose a hybrid attention mechanism (HAM) model to detect security vulnerabilities in smart contracts. We extract code fragments from the source code, which focus on key points of vulnerability. We conduct extensive experiments on two public smart contract datasets (a total of 24,957 contracts). Empirical results show remarkable accuracy improvement over the state-of-the art methods on five kinds of vulnerabilities, where the detection accuracy could achieve 93.36%, 80.85%, 82.56%, 85.62%, and 82.19% for reentrancy, arithmetic vulnerability, unchecked return value, timestamp dependency, and tx.origin, respectively.},
DOI = {10.3390/app13020770}
}


abstract 
Self-Supervised Learning of Smart Contract Representations 2022 ICPC
cite 
abstract 
Analyzing Transaction Confirmation in Ethereum Using Machine Learning Techniques 2021 SOCCA
cite@article{10.1145/3466826.3466832,
author = {Oliveira, Vinicius C. and Almeida Valadares, Julia and A. Sousa, Jose Eduardo and Borges Vieira, Alex and Bernardino, Heder Soares and Moraes Villela, Saulo and Dias Goncalves, Glauber},
title = {Analyzing Transaction Confirmation in Ethereum Using Machine Learning Techniques},
year = {2021},
issue_date = {March 2021},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
volume = {48},
number = {4},
issn = {0163-5999},
url = {https://doi.org/10.1145/3466826.3466832},
doi = {10.1145/3466826.3466832},
abstract = {},
journal = {SIGMETRICS Perform. Eval. Rev.},
month = {may},
pages = {12–15},
numpages = {4},
keywords = {blockchain, machine learning, transaction, ethereum}
}
abstractEthereum has emerged as one of the most important cryptocurrencies in terms of the number of transactions. Given the recent growth of Ethereum, the cryptocurrency community and researchers are interested in understanding the Ethereum transactions behavior. In this work, we investigate a key aspect of Ethereum: the prediction of a transaction confirmation or failure based on its features. This is a challenging issue due to the small, but still relevant, fraction of failures in millions of recorded transactions and the complexity of the distributed mechanism to execute transactions in Ethereum. To conduct this investigation, we train machine learning models for this prediction, taking into consideration carefully balanced sets of confirmed and failed transactions. The results show high-performance models for classification of transactions with the best values of F1-score and area under the ROC curve approximately equal to 0.67 and 0.87, respectively. Also, we identified the gas used as the most relevant feature for the prediction.
Combining Graph Neural Networks with Expert Knowledge for Smart Contract 2021 TKDE
cite
abstract
ContractWard: Automated Vulnerability Detection Models for Ethereum Smart 2021 NSE
cite
abstract
DeeSCVHunter: A Deep Learning-Based Framework for Smart Contract Vulnerability 2021 IJCNN
cite
abstract
Dynamic Vulnerability Detection on Smart Contracts Using Machine Learning 2021 EASE
cite@inproceedings{10.1145/3463274.3463348,
author = {Eshghie, Mojtaba and Artho, Cyrille and Gurov, Dilian},
title = {Dynamic Vulnerability Detection on Smart Contracts Using Machine Learning},
year = {2021},
isbn = {9781450390538},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3463274.3463348},
doi = {10.1145/3463274.3463348},
abstract = {},
booktitle = {Evaluation and Assessment in Software Engineering},
pages = {305–312},
numpages = {8},
keywords = {Vulnerability Detection, Ethereum, Blockchain, Smart Contracts, Machine Learning for Dynamic Software Analysis},
location = {Trondheim, Norway},
series = {EASE 2021}
}
abstract In this work we propose Dynamit, a monitoring framework to detect reentrancy vulnerabilities in Ethereum smart contracts. The novelty of our framework is that it relies only on transaction metadata and balance data from the blockchain system; our approach requires no domain knowledge, code instrumentation, or special execution environment. Dynamit extracts features from transaction data and uses a machine learning model to classify transactions as benign or harmful. Therefore, not only can we find the contracts that are vulnerable to reentrancy attacks, but we also get an execution trace that reproduces the attack. Using a random forest classifier, our model achieved more than 90 percent accuracy on 105 transactions, showing the potential of our technique.
Hunting Vulnerable Smart Contracts via Graph Embedding Based Bytecode Matching 2021 TIFS
cite
abstract
Learning to Explore Paths for Symbolic Execution 2021 CCS
cite@inproceedings{10.1145/3460120.3484813,
author = {He, Jingxuan and Sivanrupan, Gishor and Tsankov, Petar and Vechev, Martin},
title = {Learning to Explore Paths for Symbolic Execution},
year = {2021},
isbn = {9781450384544},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3460120.3484813},
doi = {10.1145/3460120.3484813},
abstract = {},
booktitle = {Proceedings of the 2021 ACM SIGSAC Conference on Computer and Communications Security},
pages = {2526–2540},
numpages = {15},
keywords = {program testing, machine learning, symbolic execution, fuzzing},
location = {Virtual Event, Republic of Korea},
series = {CCS '21}
}
abstractSymbolic execution is a powerful technique that can generate tests steering program execution into desired paths. However, the scalability of symbolic execution is often limited by path explosion, i.e., the number of symbolic states representing the paths under exploration quickly explodes as execution goes on. Therefore, the effectiveness of symbolic execution engines hinges on the ability to select and explore the right symbolic states.In this work, we propose a novel learning-based strategy, called Learch, able to effectively select promising states for symbolic execution to tackle the path explosion problem. Learch directly estimates the contribution of each state towards the goal of maximizing coverage within a time budget, as opposed to relying on manually crafted heuristics based on simple statistics as a crude proxy for the objective. Moreover, Learch leverages existing heuristics in training data generation and feature extraction, and can thus benefit from any new expert-designed heuristics. We instantiated Learch in KLEE, a widely adopted symbolic execution engine. We evaluated Learch on a diverse set of programs, showing that Learch is practically effective: it covers more code and detects more security violations than existing manual heuristics, as well as combinations of those heuristics. We also show that using tests generated by Learch as initial fuzzing seeds enables the popular fuzzer AFL to find more paths and security violations.
SmarTest: Effectively Hunting Vulnerable Transaction Sequences in Smart 2021 USENIX
cite/@inproceedings {272310,
author = {Sunbeom So and Seongjoon Hong and Hakjoo Oh},
title = {{SmarTest}: Effectively Hunting Vulnerable Transaction Sequences in Smart Contracts through Language {Model-Guided} Symbolic Execution},
booktitle = {30th USENIX Security Symposium (USENIX Security 21)},
year = {2021},
isbn = {978-1-939133-24-3},
pages = {1361--1378},
url = {https://www.usenix.org/conference/usenixsecurity21/presentation/so},
publisher = {USENIX Association},
month = aug,
}
abstractWe present SmarTest, a novel symbolic execution technique for effectively hunting vulnerable transaction sequences in smart contracts. Because smart contracts are stateful programs whose states are altered by transactions, diagnosing and understanding nontrivial vulnerabilities requires generating sequences of transactions that demonstrate the flaws. However, finding such vulnerable transaction sequences is challenging as the number of possible combinations of transactions is intractably large. As a result, most existing tools for smart contract analysis use abstractions and merely point out the locations of vulnerabilities, which in turn imposes a steep burden on users of understanding the bugs, or have limited power in generating transaction sequences. In this paper, we aim to overcome this challenge by combining symbolic execution with a language model for vulnerable transaction sequences, so that symbolic execution effectively prioritizes program paths that are likely to reveal vulnerabilities. Experimental results with real-world smart contracts show that SmarTest significantly outperforms existing tools by finding more vulnerable transaction sequences including critical zero-day vulnerabilities.
Smart-Graph: Graphical Representations for Smart Contract on the Ethereum 2021 SANER
cite
abstract
SmartGift: Learning to Generate Practical Inputs for Testing Smart Contracts 2021 ICSME
cite
abstract
TokenCheck: Towards Deep Learning Based Security Vulnerability Detection 2021 TENSY
cite
abstract
VSCL: Automating Vulnerability Detection in Smart Contracts with Deep Learning 2021 ICBC
cite
abstract
VSCL: Automating Vulnerability Detection in Smart Contracts with Deep Learning 2021 ICBC
cite@INPROCEEDINGS{9461050, author={Mi, Feng and Wang, Zhuoyi and Zhao, Chen and Guo, Jinghui and Ahmed, Fawaz and Khan, Latifur}, booktitle={2021 IEEE International Conference on Blockchain and Cryptocurrency (ICBC)}, title={VSCL: Automating Vulnerability Detection in Smart Contracts with Deep Learning}, year={2021}, volume={}, number={}, pages={1-9}, doi={10.1109/ICBC51069.2021.9461050}}
abstractWith the increase of the adoption of blockchain technology in providing decentralized solutions to various problems, smart contracts have become more popular to the point that billions of US Dollars are currently exchanged every day through such technology. Meanwhile, various vulnerabilities in smart contracts have been exploited by attackers to steal cryptocurrencies worth millions of dollars. The automatic detection of smart contract vulnerabilities therefore is an essential research problem. Existing solutions to this problem particularly rely on human experts to define features or different rules to detect vulnerabilities. However, this often causes many vulnerabilities to be ignored, and they are inefficient in detecting new vulnerabilities. In this study, to overcome such challenges, we propose the VSCL framework to automatically detect vulnerabilities in smart contracts on the blockchain. More specifically, first, we utilize novel feature vector generation techniques from bytecode of smart contract since the source code of smart contracts are rarely available in public. Next, the collected vectors are fed into our novel metric learning-based deep neural network(DNN) to get the detection result. We conduct comprehensive experiments on a large-scale benchmark, and the quantitative results demonstrate the effectiveness and efficiency of our approach.
Checking Smart Contracts with Structural Code Embedding 2020 TSE
cite
abstract
Learning Markets: An AI Collaboration Framework Based on Blockchain and 2020 JIOT
cite@ARTICLE{9234516, author={Ouyang, Liwei and Yuan, Yong and Wang, Fei-Yue}, journal={IEEE Internet of Things Journal}, title={Learning Markets: An AI Collaboration Framework Based on Blockchain and Smart Contracts}, year={2020}, volume={}, number={}, pages={1-1}, doi={10.1109/JIOT.2020.3032706}}
abstractArtificial intelligence (AI) has been witnessed to provide valuable solutions to all walks of life. However, data island and computing resources limitations in the centralized AI architectures have increased their technical barriers, and thus distributed AI collaboration in data, models and resources has attracted intensive research interests. Since the existing trust-based collaboration models are no longer applicable for the large-scale distributed collaboration among trustless machines in open and dynamic environments, this paper proposes a novel decentralized AI collaboration framework, i.e., Learning Markets (LM), in which blockchain provides a trustless environment for collaboration and transaction, while smart contracts serve as software-defined agents to encapsulate and process scalable collaboration relationships and market mechanisms. LM can not only help those participants without mutual trust realize collaborative mining with dynamic and quantitative rewards, but also build an AI market with natural auditability and traceability for trading trusted and verified models. We implement and comprehensively analyze LM based on the Ethereum and IPFS platform, and the results prove that it has advantages in collaboration fairness, transparency, security, decentralization and universality. Based on our collaboration framework, distributed AI contributors are expected to cooperate and complete those learning tasks that cannot be done previously due to lack of complete data, sufficient computing resources and state-of-the-art models.
Machine Learning Based Bug Prediction Engine For Smart Contracts 2020 UYMS
cite@INPROCEEDINGS{9247056, author={GÜl, Ahmet and KÖorĞlu, Yavuz and Şen, Alper}, booktitle={2020 Turkish National Software Engineering Symposium (UYMS)}, title={Machine Learning Based Bug Prediction Engine For Smart Contracts}, year={2020}, volume={}, number={}, pages={1-6}, doi={10.1109/UYMS50627.2020.9247056}}
abstractAs blockchain solutions become widespread, identifying potential bugs in smart contracts written in Solidity language will be important for these solutions to work correctly. To accurately detect these bugs, the developer must use several state-of-the-art bug detection tools and investigate the potential bugs they report. In this study, we first show that one tool is not enough to detect all the bugs as our Static Analysis for Solidity tool (SA-Solidity) and the known SmartCheck and Securify tools identify different bugs in SmartEmbed's experimental set of smart contracts. Then, we develop Machine Learning-based Bug Predictor for Solidity (MLBP-Solidity) which predicts files that would be reported by all the previous bug detection tools. MLBP-Solidity eases the burden on the developer by allowing him/her to focus on a subset of files that are most probably buggy. Our experimental results show that MLBP-Solidity achieves 91-99% accuracy, depending on the type of predicted bug.
SCScan: A SVM-Based Scanning System for Vulnerabilities in Blockchain Smart 2020 TrustCom
cite
abstract
Smart Contract Vulnerability Detection Using Graph Neural Networks 2020 IJCAI
cite 
abstract 
Smart Contracts Vulnerability Auditing with Multi-semantics 2020 COMPSAC
cite
abstract
Towards Automated Reentrancy Detection for Smart Contracts Based on Sequential 2020 IEEE Access
cite
abstract
When Deep Learning Meets Smart Contracts 2020 ASE
cite
abstract
A Novel Neural Source Code Representation Based on Abstract Syntax Tree 2019 ICSE
cite/@INPROCEEDINGS{8812062,
author={Zhang, Jian and Wang, Xu and Zhang, Hongyu and Sun, Hailong and Wang, Kaixuan and Liu, Xudong},
booktitle={2019 IEEE/ACM 41st International Conference on Software Engineering (ICSE)},
title={A Novel Neural Source Code Representation Based on Abstract Syntax Tree},
year={2019},
volume={},
number={},
pages={783-794},
doi={10.1109/ICSE.2019.00086}}
abstract 
Learning to Fuzz from Symbolic Execution with Application to Smart Contracts 2019 CCS
cite@inproceedings{10.1145/3319535.3363230,
author = {He, Jingxuan and Balunovi'{c}, Mislav and Ambroladze, Nodar and Tsankov, Petar and Vechev, Martin},
title = {Learning to Fuzz from Symbolic Execution with Application to Smart Contracts},
year = {2019},
isbn = {9781450367479},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/3319535.3363230},
doi = {10.1145/3319535.3363230},
abstract = {},
booktitle = {Proceedings of the 2019 ACM SIGSAC Conference on Computer and Communications Security},
pages = {531–548},
numpages = {18},
keywords = {symbolic execution, smart contracts, imitation learning, fuzzing},
location = {London, United Kingdom},
series = {CCS '19}
}
abstractFuzzing and symbolic execution are two complementary techniques for discovering software vulnerabilities. Fuzzing is fast and scalable, but can be ineffective when it fails to randomly select the right inputs. Symbolic execution is thorough but slow and often does not scale to deep program paths with complex path conditions. In this work, we propose to learn an effective and fast fuzzer from symbolic execution, by phrasing the learning task in the framework of imitation learning. During learning, a symbolic execution expert generates a large number of quality inputs improving coverage on thousands of programs. Then, a fuzzing policy, represented with a suitable architecture of neural networks, is trained on the generated dataset. The learned policy can then be used to fuzz new programs. We instantiate our approach to the problem of fuzzing smart contracts, a domain where contracts often implement similar functionality (facilitating learning) and security is of utmost importance. We present an end-to-end system, ILF (for Imitation Learning based Fuzzer), and an extensive evaluation over >18K contracts. Our results show that ILF is effective: (i) it is fast, generating 148 transactions per second, (ii) it outperforms existing fuzzers (e.g., achieving 33% more coverage), and (iii) it detects more vulnerabilities than existing fuzzing and symbolic execution tools for Ethereum.
Machine Learning Model for Smart Contracts Security Analysis 2019 PST
cite@INPROCEEDINGS{8949045, author={Momeni, Pouyan and Wang, Yu and Samavi, Reza}, booktitle={2019 17th International Conference on Privacy, Security and Trust (PST)}, title={Machine Learning Model for Smart Contracts Security Analysis}, year={2019}, volume={}, number={}, pages={1-6}, doi={10.1109/PST47121.2019.8949045}}
abstractIn this paper, we introduce a machine learning predictive model that detects patterns of security vulnerabilities in smart contracts. We adapted two static code analyzers to label more than 1000 smart contracts that were verified and used on the Ethereum platform. Our model predicted a number of major software vulnerabilities with the average accuracy of 95 percent. The model currently supports smart contracts developed in Solidity, however, the approach described in this paper can be applied to other languages and blockchain platforms.
Multi-Modal Attention Network Learning for Semantic Source Code Retrieval 2019 ASE
cite 
abstract 
Attention Is All You Need 2017 NIPS
cite 
abstract