Hadoop streaming用のスクリプトファイルを配布する時など

lsyncd

lsyncdインストール

sudo aptitude -y install lsyncd

lsyncd設定ファイル

sudo vi /etc/lsyncd/lsyncd.conf.lua
----
-- Streaming configuration file for lsyncd.
--
settings = {
    statusFile = "/var/run/lsyncd.stat",
    statusInterval = 30,
}

sync { 
    default.rsync, 
    source="/home/mapred/",
    target="slaves000:/home/mapred/",
    rsyncOps={"-aruz", "--delete"}, 
    delay=10 
}
sync { 
    default.rsync, 
    source="/home/mapred/",
    target="slaves001:/home/mapred/",
    rsyncOps={"-aruz", "--delete"}, 
    delay=10 
}
sync { 
    default.rsync, 
    source="/home/mapred/",
    target="slaves002:/home/mapred/",
    rsyncOps={"-aruz", "--delete"}, 
    delay=10 
}

lsyncdデーモン起動

sudo /etc/init.d/lsyncd start

rsync

rsyncd設定ファイル

sudo vi /etc/rsyncd.conf
# GLOBAL OPTIONS

# pid file = /var/run/rsync.pid
# log file = /var/log/rsync.log

timeout = 600
hosts allow *.sheeps.me
read only = yes

max connections = 2
dont compress = *.gz *.tgz *.zip *.z *.rpm *.deb *.iso *.bz2 *.tbz

[MapReduce]
comment = PHP for Hadoop streaming
path = /home/mapred
uid = mapred
gid = mapred

rsyncdデフォルトファイル

sudo vi /etc/default/rsync
# start rsync in daemon mode from init.d script?
#  only allowed values are "true", "false", and "inetd"
#  Use "inetd" if you want to start the rsyncd from inetd,
#  all this does is prevent the init.d script from printing a message
#  about not starting rsyncd (you still need to modify inetd's config yourself).
RSYNC_ENABLE=true

rsyncデーモン起動

sudo /etc/init.d/rsync start

SSHを利用したlsyncd設定

sudo vi /etc/lsyncd/lsyncd.conf.lua
----
-- Streaming configuration file for lsyncd.
--
settings = {
    statusFile = "/var/run/lsyncd.stat",
    statusInterval = 30,
}

sync {
    default.rsyncssh,
    source="/home/mapred/",
    host="hdfs@slaves000",
    targetdir="/home/mapred/",
    rsyncOps={"-aruz", "--delete"}, 
    delay=10
}

sync {
    default.rsyncssh,
    source="/home/mapred/",
    host="hdfs@slaves001",
    targetdir="/home/mapred/",
    rsyncOps={"-aruz", "--delete"}, 
    delay=10
}

sync {
    default.default.rsyncssh,
    source="/home/mapred/",
    host="hdfs@slaves002",
    targetdir="/home/mapred/",
    rsyncOps={"-aruz", "--delete"}, 
    delay=10
}

rootのSSH設定

sudo vi /root/.ssh/config
Host slaves000
    HostName            slaves000.sheeps.me
    IdentityFile        /root/.ssh/id_rsa
    User                hdfs

Host slaves001
    HostName            slaves001.sheeps.me
    IdentityFile        /root/.ssh/id_rsa
    User                hdfs

Host slaves002
    HostName            slaves002.sheeps.me
    IdentityFile        /root/.ssh/id_rsa
    User                hdfs
sudo cp $HADOOP_HOME/.ssh/id_rsa /root/.ssh/id_rsa
sudo chmod 0600 /root/.ssh/id_rsa

Lsyncdからrsyncを実行するユーザーを変更する方法がわからないので
デフォルトのrootで実行させています。

接続先が/root/.ssh/known_hostsに登録されていないとエラーとなりました。
予め接続して登録しておく必要があるようです。

Leave a reply