|
| 1 | +ARG SPARK_VERSION=2.4.6 |
| 2 | +ARG SPARK_HOME=/opt/spark |
| 3 | +ARG JDK_VERSION=8u192 |
| 4 | +ARG JDK_URL=your_jdk_url |
| 5 | +ARG BIGDL_VERSION=0.13.0 |
| 6 | +ARG ANALYTICS_ZOO_VERSION=0.12.0-SNAPSHOT |
| 7 | +ARG TINI_VERSION=v0.18.0 |
| 8 | + |
| 9 | +# stage.1 jdk & spark |
| 10 | +FROM ubuntu:18.04 as spark |
| 11 | +ARG SPARK_VERSION |
| 12 | +ARG JDK_VERSION |
| 13 | +ARG JDK_URL |
| 14 | +ARG SPARK_HOME |
| 15 | +ENV TINI_VERSION v0.18.0 |
| 16 | +ENV SPARK_VERSION ${SPARK_VERSION} |
| 17 | +ENV SPARK_HOME ${SPARK_HOME} |
| 18 | +RUN apt-get update --fix-missing && \ |
| 19 | + apt-get install -y apt-utils vim curl nano wget unzip maven git && \ |
| 20 | +# java |
| 21 | + wget $JDK_URL && \ |
| 22 | + gunzip jdk-$JDK_VERSION-linux-x64.tar.gz && \ |
| 23 | + tar -xf jdk-$JDK_VERSION-linux-x64.tar -C /opt && \ |
| 24 | + rm jdk-$JDK_VERSION-linux-x64.tar && \ |
| 25 | + mv /opt/jdk* /opt/jdk$JDK_VERSION && \ |
| 26 | + ln -s /opt/jdk$JDK_VERSION /opt/jdk && \ |
| 27 | +# spark |
| 28 | + wget https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop2.7.tgz && \ |
| 29 | + tar -zxvf spark-${SPARK_VERSION}-bin-hadoop2.7.tgz && \ |
| 30 | + mv spark-${SPARK_VERSION}-bin-hadoop2.7 /opt/spark && \ |
| 31 | + rm spark-${SPARK_VERSION}-bin-hadoop2.7.tgz && \ |
| 32 | + cp /opt/spark/kubernetes/dockerfiles/spark/entrypoint.sh /opt |
| 33 | + |
| 34 | +RUN ln -fs /bin/bash /bin/sh |
| 35 | +RUN if [ $SPARK_VERSION = "3.1.2" ]; then \ |
| 36 | + rm $SPARK_HOME/jars/okhttp-*.jar && \ |
| 37 | + wget -P $SPARK_HOME/jars https://repo1.maven.org/maven2/com/squareup/okhttp3/okhttp/3.8.0/okhttp-3.8.0.jar; \ |
| 38 | + elif [ $SPARK_VERSION = "2.4.6" ]; then \ |
| 39 | + rm $SPARK_HOME/jars/kubernetes-client-*.jar && \ |
| 40 | + wget -P $SPARK_HOME/jars https://repo1.maven.org/maven2/io/fabric8/kubernetes-client/4.4.2/kubernetes-client-4.4.2.jar; \ |
| 41 | + fi |
| 42 | + |
| 43 | +ADD https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini /sbin/tini |
| 44 | + |
| 45 | +# stage.2 analytics-zoo |
| 46 | +FROM ubuntu:18.04 as analytics-zoo |
| 47 | +ARG SPARK_VERSION |
| 48 | +ARG BIGDL_VERSION |
| 49 | +ARG ANALYTICS_ZOO_VERSION |
| 50 | + |
| 51 | +ENV SPARK_VERSION ${SPARK_VERSION} |
| 52 | +ENV BIGDL_VERSION ${BIGDL_VERSION} |
| 53 | +ENV ANALYTICS_ZOO_VERSION ${ANALYTICS_ZOO_VERSION} |
| 54 | +ENV ANALYTICS_ZOO_HOME /opt/analytics-zoo-${ANALYTICS_ZOO_VERSION} |
| 55 | + |
| 56 | +RUN apt-get update --fix-missing && \ |
| 57 | + apt-get install -y apt-utils vim curl nano wget unzip maven git |
| 58 | +ADD ./download-analytics-zoo.sh /opt |
| 59 | + |
| 60 | +RUN chmod a+x /opt/download-analytics-zoo.sh && \ |
| 61 | + mkdir -p /opt/analytics-zoo-examples/python |
| 62 | +RUN /opt/download-analytics-zoo.sh && \ |
| 63 | + rm analytics-zoo-bigdl*.zip && \ |
| 64 | + unzip $ANALYTICS_ZOO_HOME/lib/*.zip 'zoo/examples/*' -d /opt/analytics-zoo-examples/python && \ |
| 65 | + mv /opt/analytics-zoo-examples/python/zoo/examples/* /opt/analytics-zoo-examples/python && \ |
| 66 | + rm -rf /opt/analytics-zoo-examples/python/zoo/examples |
| 67 | + |
| 68 | +# stage.3 copies layer |
| 69 | +FROM ubuntu:18.04 as copies-layer |
| 70 | +ARG ANALYTICS_ZOO_VERSION |
| 71 | + |
| 72 | +COPY --from=analytics-zoo /opt/analytics-zoo-${ANALYTICS_ZOO_VERSION} /opt/analytics-zoo-${ANALYTICS_ZOO_VERSION} |
| 73 | +COPY --from=analytics-zoo /opt/analytics-zoo-examples/python /opt/analytics-zoo-examples/python |
| 74 | +COPY --from=spark /opt/jdk /opt/jdk |
| 75 | +COPY --from=spark /opt/spark /opt/spark |
| 76 | +COPY --from=spark /opt/spark/kubernetes/dockerfiles/spark/entrypoint.sh /opt |
| 77 | + |
| 78 | + |
| 79 | +# stage.4 |
| 80 | +FROM ubuntu:18.04 |
| 81 | +MAINTAINER The Analytics-Zoo Authors https://github.com/intel-analytics/analytics-zoo |
| 82 | +ARG ANALYTICS_ZOO_VERSION |
| 83 | +ARG BIGDL_VERSION |
| 84 | +ARG SPARK_VERSION |
| 85 | +ARG SPARK_HOME |
| 86 | +ARG TINI_VERSION |
| 87 | + |
| 88 | +ENV ANALYTICS_ZOO_VERSION ${ANALYTICS_ZOO_VERSION} |
| 89 | +ENV SPARK_HOME ${SPARK_HOME} |
| 90 | +ENV SPARK_VERSION ${SPARK_VERSION} |
| 91 | +ENV ANALYTICS_ZOO_HOME /opt/analytics-zoo-${ANALYTICS_ZOO_VERSION} |
| 92 | +ENV FLINK_HOME /opt/flink-${FLINK_VERSION} |
| 93 | +ENV OMP_NUM_THREADS 4 |
| 94 | +ENV NOTEBOOK_PORT 12345 |
| 95 | +ENV NOTEBOOK_TOKEN 1234qwer |
| 96 | +ENV RUNTIME_SPARK_MASTER local[4] |
| 97 | +ENV RUNTIME_K8S_SERVICE_ACCOUNT spark |
| 98 | +ENV RUNTIME_K8S_SPARK_IMAGE intelanalytics/hyper-zoo:${ANALYTICS_ZOO_VERSION}-${SPARK_VERSION} |
| 99 | +ENV RUNTIME_DRIVER_HOST localhost |
| 100 | +ENV RUNTIME_DRIVER_PORT 54321 |
| 101 | +ENV RUNTIME_EXECUTOR_CORES 4 |
| 102 | +ENV RUNTIME_EXECUTOR_MEMORY 20g |
| 103 | +ENV RUNTIME_EXECUTOR_INSTANCES 1 |
| 104 | +ENV RUNTIME_TOTAL_EXECUTOR_CORES 4 |
| 105 | +ENV RUNTIME_DRIVER_CORES 4 |
| 106 | +ENV RUNTIME_DRIVER_MEMORY 10g |
| 107 | +ENV RUNTIME_PERSISTENT_VOLUME_CLAIM myvolumeclaim |
| 108 | +ENV SPARK_HOME /opt/spark |
| 109 | +ENV HADOOP_CONF_DIR /opt/hadoop-conf |
| 110 | +ENV BIGDL_VERSION ${BIGDL_VERSION} |
| 111 | +ENV BIGDL_CLASSPATH ${ANALYTICS_ZOO_HOME}/lib/analytics-zoo-bigdl_${BIGDL_VERSION}-spark_${SPARK_VERSION}-${ANALYTICS_ZOO_VERSION}-jar-with-dependencies.jar |
| 112 | +ENV JAVA_HOME /opt/jdk |
| 113 | +ENV REDIS_HOME /opt/redis-5.0.5 |
| 114 | +ENV CS_HOME /opt/work/cluster-serving |
| 115 | +ENV PYTHONPATH ${ANALYTICS_ZOO_HOME}/lib/analytics-zoo-bigdl_${BIGDL_VERSION}-spark_${SPARK_VERSION}-${ANALYTICS_ZOO_VERSION}-python-api.zip:${SPARK_HOME}/python/lib/pyspark.zip:${SPARK_HOME}/python/lib/py4j-*.zip:${CS_HOME}/serving-python.zip:/opt/models/research/slim |
| 116 | +ENV PATH ${ANALYTICS_ZOO_HOME}/bin/cluster-serving:${JAVA_HOME}/bin:/root/miniconda3/bin:${PATH} |
| 117 | +ENV TINI_VERSION ${TINI_VERSION} |
| 118 | +ENV LC_ALL C.UTF-8 |
| 119 | +ENV LANG C.UTF-8 |
| 120 | + |
| 121 | + |
| 122 | +COPY --from=copies-layer /opt /opt |
| 123 | +COPY --from=spark /sbin/tini /sbin/tini |
| 124 | +ADD ./start-notebook-spark.sh /opt |
| 125 | +ADD ./start-notebook-k8s.sh /opt |
| 126 | + |
| 127 | +RUN mkdir -p /opt/analytics-zoo-examples/python && \ |
| 128 | + mkdir -p /opt/analytics-zoo-examples/scala && \ |
| 129 | + apt-get update --fix-missing && \ |
| 130 | + apt-get install -y apt-utils vim curl nano wget unzip maven git && \ |
| 131 | + apt-get install -y gcc g++ make && \ |
| 132 | + apt-get install -y libsm6 libxext6 libxrender-dev && \ |
| 133 | + rm /bin/sh && \ |
| 134 | + ln -sv /bin/bash /bin/sh && \ |
| 135 | + echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su && \ |
| 136 | + chgrp root /etc/passwd && chmod ug+rw /etc/passwd && \ |
| 137 | +# python |
| 138 | + apt-get install -y python3-minimal && \ |
| 139 | + apt-get install -y build-essential python3 python3-setuptools python3-dev python3-pip && \ |
| 140 | + pip3 install --no-cache-dir --upgrade pip && \ |
| 141 | + pip install --no-cache-dir --upgrade setuptools && \ |
| 142 | + pip install --no-cache-dir numpy==1.18.1 scipy && \ |
| 143 | + pip install --no-cache-dir pandas==1.0.3 && \ |
| 144 | + pip install --no-cache-dir scikit-learn matplotlib seaborn jupyter jupyterlab requests h5py && \ |
| 145 | + ln -s /usr/bin/python3 /usr/bin/python && \ |
| 146 | + #Fix tornado await process |
| 147 | + pip uninstall -y -q tornado && \ |
| 148 | + pip install --no-cache-dir tornado && \ |
| 149 | + python3 -m ipykernel.kernelspec && \ |
| 150 | + pip install --no-cache-dir tensorboard && \ |
| 151 | + pip install --no-cache-dir jep && \ |
| 152 | + pip install --no-cache-dir cloudpickle && \ |
| 153 | + pip install --no-cache-dir opencv-python && \ |
| 154 | + pip install --no-cache-dir pyyaml && \ |
| 155 | + pip install --no-cache-dir redis && \ |
| 156 | + pip install --no-cache-dir ray[tune]==1.2.0 && \ |
| 157 | + pip install --no-cache-dir Pillow==6.1 && \ |
| 158 | + pip install --no-cache-dir psutil aiohttp && \ |
| 159 | + pip install --no-cache-dir py4j && \ |
| 160 | + pip install --no-cache-dir cmake==3.16.3 && \ |
| 161 | + pip install --no-cache-dir torch==1.7.1 torchvision==0.8.2 && \ |
| 162 | + pip install --no-cache-dir horovod==0.19.2 && \ |
| 163 | +#tf2 |
| 164 | + pip install --no-cache-dir pyarrow && \ |
| 165 | + pip install opencv-python==4.2.0.34 && \ |
| 166 | + pip install aioredis==1.1.0 && \ |
| 167 | + pip install tensorflow==2.4.0 && \ |
| 168 | +# chmod |
| 169 | + chmod a+x /opt/start-notebook-spark.sh && \ |
| 170 | + chmod a+x /opt/start-notebook-k8s.sh && \ |
| 171 | + chmod +x /sbin/tini && \ |
| 172 | + cp /sbin/tini /usr/bin/tini |
| 173 | + |
| 174 | +WORKDIR /opt/spark/work-dir |
| 175 | + |
| 176 | +ENTRYPOINT [ "/opt/entrypoint.sh" ] |
0 commit comments