#-#
#-# Command line
#-#
#-# Namenode
Command Description
hadoop namenode -format Format HDFS filesystem from Namenode.
hadoop namenode -upgrade Upgrade the NameNode.
start-dfs.sh Start HDFS Daemons.
stop-dfs.sh Stop HDFS Daemons.
start-mapred.sh Start MapReduce Daemons.
stop-mapred.sh Stop MapReduce Daemons.
hadoop namenode -recover -force Recover namenode metadata after a cluster failure (may lose data).
#-# Health
Command Description
hadoop fsck / Filesystem check on HDFS.
hadoop fsck / -files Display files during check.
hadoop fsck / -files -blocks Display files and blocks during check.
hadoop fsck / -files -blocks -locations Display files, blocks and its location during check.
hadoop fsck / -files -blocks -locations -racks Display network topology for data-node locations.
hadoop fsck -delete Delete corrupted files.
hadoop fsck -move Move corrupted files to /lost+found directory.
#-# Jobs
Command Description
hadoop job -submit
hadoop job -status
hadoop job -list all List all jobs.
hadoop job -list-active-trackers List all available TaskTrackers.
hadoop job -set-priority
hadoop job -kill-task
hadoop job -history Display job history including job details, failed and killed jobs.
#-# Distributed FileSystem admin
Command Description
hadoop dfsadmin -report Report filesystem info and statistics.
hadoop dfsadmin -metasave file.txt Save namenode’s primary data structures to file.txt.
hadoop dfsadmin -setQuota 10 /quotatest Set Hadoop directory quota to only 10 files.
hadoop dfsadmin -clrQuota /quotatest Clear Hadoop directory quota.
hadoop dfsadmin -refreshNodes Read hosts and exclude files to update datanodes.
hadoop fs -count -q /mydir Check quota space on directory /mydir.
hadoop dfsadmin -setSpaceQuota /mydir 100M Set quota to 100M on hdfs directory named /mydir.
hadoop dfsadmin -clrSpaceQuota /mydir Clear quota on a HDFS directory.
hadooop dfsadmin -saveNameSpace Backup Metadata (fsimage & edits). Put cluster in safe mode before this command.
#-# Safe mode
Command Description
hadoop dfsadmin -safemode enter Enter safe mode.
hadoop dfsadmin -safemode leave Leave safe mode.
hadoop dfsadmin -safemode get Get the status of mode.
hadoop dfsadmin -safemode wait Wait until HDFS finishes data block replication.
#-# Configuration files
Command Description
hadoop-env.sh Sets ENV variables for Hadoop.
core-site.xml Parameters for entire Hadoop cluster.
hdfs-site.xml Parameters for HDFS and its clients.
mapred-site.xml Parameters for MapReduce and its clients.
masters Host machines for secondary Namenode.
slaves List of slave hosts.
#-# Map-Reduce admin
Command Description
hadoop mradmin -safemode get Check Job tracker status.
hadoop mradmin -refreshQueues Reload mapreduce configuration.
hadoop mradmin -refreshNodes Reload active TaskTrackers.
hadoop mradmin -refreshServiceAcl Force Jobtracker to reload service ACL.
hadoop mradmin -refreshUserToGroupsMappings Force jobtracker to reload user group mappings.
#-# Balancer
Command Description
start-balancer.sh Balance the cluster.
hadoop dfsadmin -setBalancerBandwidth
hadoop balancer -threshold 20 Limit balancing to only 20% resources in the cluster.
#-# Filesystem
Command Description
hadoop fs -mkdir
hadoop fs -ls List files and directories in HDFS.
hadoop fs -cat
hadoop fs -du Check disk space usage in HDFS.
hadoop fs -expunge Empty trash on HDFS.
hadoop fs -chgrp
hadoop fs -chown
hadoop fs -rm
hadoop fs -touchz
hadoop fs -stat
hadoop fs -test -e
hadoop fs -test -z
hadoop fs -test -d
#-# Misc
Command Description
hadoop fs -copyFromLocal