1. 下载spark与scala
- Spark下载地址
http://mirrors.hust.edu.cn/apache/spark/spark-2.4.5/spark-2.4.5-bin-hadoop2.7.tgz
- Scala下载地址
http://www.scala-lang.org/files/archive/scala-2.10.4.tgz
2. 解压安装
$ tar xzf ~/assembly/spark-2.4.5-bin-hadoop2.7.tgz -C /apps/svr/spark/
$ tar xzf ~/assembly/scala-2.10.4.tgz -C /apps/svr/spark/
3. 修改配置文件
$ cd /apps/svr/spark/spark-2.4.5-bin-hadoop2.7/conf/
$ cp spark-env.sh.template spark-env.sh
$ cp log4j.properties.template log4j.properties
$ vim spark-env.sh
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
export JAVA_HOME=/apps/svr/java/jdk1.8.0_121
export SCALA_HOME=/apps/svr/spark/scala-2.10.4
export HADOOP_HOME=/apps/svr/hadoop/hadoop-2.7.3
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export SPARK_MASTER_HOST=master86
export SPARK_WORKER_MEMORY=4G
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
4. 配置环境变量
$ vim ~/.bash_profile
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# SCALA_HOME
export SCALA_HOME=/apps/svr/spark/scala-2.10.4
export PATH=$PATH:$SCALA_HOME/bin
# SPARK_HOME
export SPARK_HOME=/apps/svr/spark/spark-2.4.5-bin-hadoop2.7
export PATH=$PATH:$SPARK_HOME/bin
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
$ source ~/.bash_profile
5. 启动spark
$ pyspark