From 3702608eb9568bce36df2c645c9cd6ba9fc9bc2f Mon Sep 17 00:00:00 2001
From: tennix <ztennix@gmail.com>
Date: Mon, 16 Jul 2018 22:30:33 +0800
Subject: [PATCH 1/4] update tispark to 1.0

---
 README.md          |  9 +++++++++
 tispark/Dockerfile | 23 +++++++++++++++++++----
 2 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index 22cf159..2a987e5 100644
--- a/README.md
+++ b/README.md
@@ -180,3 +180,12 @@ scala> spark.sql("select count(*) from lineitem").show
 |   60175|
 +--------+
 ```
+
+You can also access Spark with Python or R using following commands:
+
+```
+docker-compose exec tispark-master /opt/spark/bin/pyspark
+docker-compose exec tispark-master /opt/spark/bin/sparkR
+```
+
+More document about TiSpark can be found [here](https://github.com/pingcap/tispark).
diff --git a/tispark/Dockerfile b/tispark/Dockerfile
index 750e377..62c6758 100644
--- a/tispark/Dockerfile
+++ b/tispark/Dockerfile
@@ -2,25 +2,40 @@ FROM anapsix/alpine-java:8
 
 ENV SPARK_VERSION=2.1.1 \
     HADOOP_VERSION=2.7 \
-    TISPARK_VERSION=0.1.0-SNAPSHOT \
+    TISPARK_VERSION=1.0 \
+    TISPARK_R_VERSION=1.1 \
+    TISPARK_PYTHON_VERSION=1.0.1 \
     SPARK_HOME=/opt/spark \
     SPARK_NO_DAEMONIZE=true \
     SPARK_MASTER_PORT=7077 \
     SPARK_MASTER_HOST=0.0.0.0 \
     SPARK_MASTER_WEBUI_PORT=8080
 
+ADD R /TiSparkR
+
 # base image only contains busybox version nohup and ps
 # spark scripts needs nohup in coreutils and ps in procps
 # and we can use mysql-client to test tidb connection
-RUN apk --no-cache add coreutils procps mysql-client python py-pip R \
-    && pip install pytispark==1.0.1 pyspark==2.1.2
+RUN apk --no-cache add \
+        coreutils \
+        mysql-client \
+        procps \
+        python \
+        py-pip \
+        R \
+    && pip install --no-cache-dir pytispark==${TISPARK_PYTHON_VERSION} \
+    && R CMD build TiSparkR \
+    && R CMD INSTALL TiSparkR_${TISPARK_R_VERSION}.tar.gz \
+    && rm -rf /TiSparkR_${TISPARK_R_VERSION}.tar.gz /TiSparkR
 
 RUN wget -q https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz \
     && tar zxf spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz -C /opt/ \
     && ln -s /opt/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION} ${SPARK_HOME} \
-    && wget -q http://download.pingcap.org/tispark-${TISPARK_VERSION}-jar-with-dependencies.jar -P ${SPARK_HOME}/jars \
+    && wget -q https://github.com/pingcap/tispark/releases/download/${TISPARK_VERSION}/tispark-core-${TISPARK_VERSION}-jar-with-dependencies.jar -P ${SPARK_HOME}/jars \
     && wget -q http://download.pingcap.org/tispark-sample-data.tar.gz \
     && tar zxf tispark-sample-data.tar.gz -C ${SPARK_HOME}/data/ \
     && rm -rf spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz tispark-sample-data.tar.gz
 
+ENV PYTHONPATH=${SPARK_HOME}/python/lib/py4j-0.10.4-src.zip:${SPARK_HOME}/python:$PYTHONPATH
+
 WORKDIR ${SPARK_HOME}

From 4bb5b406bf3b906d77423d240294289f167f416f Mon Sep 17 00:00:00 2001
From: tennix <ztennix@gmail.com>
Date: Mon, 16 Jul 2018 22:32:50 +0800
Subject: [PATCH 2/4] add TiSparkR

---
 tispark/R/DESCRIPTION  | 11 +++++++++++
 tispark/R/NAMESPACE    |  1 +
 tispark/R/R/tisparkR.R | 41 +++++++++++++++++++++++++++++++++++++++++
 tispark/R/README.md    | 42 ++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 95 insertions(+)
 create mode 100644 tispark/R/DESCRIPTION
 create mode 100644 tispark/R/NAMESPACE
 create mode 100644 tispark/R/R/tisparkR.R
 create mode 100644 tispark/R/README.md

diff --git a/tispark/R/DESCRIPTION b/tispark/R/DESCRIPTION
new file mode 100644
index 0000000..fbfbe62
--- /dev/null
+++ b/tispark/R/DESCRIPTION
@@ -0,0 +1,11 @@
+Package: TiSparkR
+Type: Package
+Title: TiSpark for R
+Version: 1.1
+Author: PingCAP
+Maintainer: Novemser <novemser@gmail.com>
+Description: A shabby thin layer to support TiSpark in R language.
+License: Apache 2.0
+Copyright: 2017 PingCAP, Inc.
+Encoding: UTF-8
+LazyData: true
diff --git a/tispark/R/NAMESPACE b/tispark/R/NAMESPACE
new file mode 100644
index 0000000..d75f824
--- /dev/null
+++ b/tispark/R/NAMESPACE
@@ -0,0 +1 @@
+exportPattern("^[[:alpha:]]+")
diff --git a/tispark/R/R/tisparkR.R b/tispark/R/R/tisparkR.R
new file mode 100644
index 0000000..ac7a272
--- /dev/null
+++ b/tispark/R/R/tisparkR.R
@@ -0,0 +1,41 @@
+#
+# Copyright 2017 PingCAP, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#
+
+# Title     : TiSparkR
+# Objective : TiSpark entry for R
+# Created by: novemser
+# Created on: 17-11-1
+
+# Function:createTiContext
+# Create a new TiContext via the spark session passed in
+#
+# @return A new TiContext created on session
+# @param session A Spark Session for TiContext creation
+createTiContext <- function(session) {
+  sparkR.newJObject("org.apache.spark.sql.TiContext", session)
+}
+
+# Function:tidbMapDatabase
+# Mapping TiContext designated database to `dbName`.
+#
+# @param tiContext TiSpark context
+# @param dbName Database name to map
+# @param isPrefix Whether to use dbName As Prefix
+# @param loadStatistics Whether to use statistics information from TiDB
+tidbMapDatabase <- function(tiContext, dbName, isPrefix=FALSE, loadStatistics=TRUE) {
+  sparkR.callJMethod(tiContext, "tidbMapDatabase", dbName, isPrefix, loadStatistics)
+  paste("Mapping to database:", dbName)
+}
diff --git a/tispark/R/README.md b/tispark/R/README.md
new file mode 100644
index 0000000..bfffdee
--- /dev/null
+++ b/tispark/R/README.md
@@ -0,0 +1,42 @@
+## TiSparkR
+A thin layer build for supporting R language with TiSpark
+
+### Usage
+1. Download TiSparkR source code and build a binary package(run `R CMD build R` in TiSpark root directory). Install it to your local R library(e.g. via `R CMD INSTALL TiSparkR_1.0.0.tar.gz`)
+2. Build or download TiSpark dependency jar `tispark-core-1.0-RC1-jar-with-dependencies.jar` [here](https://github.com/pingcap/tispark).
+3. `cd` to your Spark home directory, and run
+```
+./bin/sparkR --jars /where-ever-it-is/tispark-core-${version}-jar-with-dependencies.jar
+```
+Note that you should replace the `TiSpark` jar path with your own.
+ 
+4. Use as below in your R console:
+```R
+# import tisparkR library
+> library(TiSparkR)
+# create a TiContext instance
+> ti <- createTiContext(spark)
+# Map TiContext to database:tpch_test
+> tidbMapDatabase(ti, "tpch_test")
+
+# Run a sql query
+> customers <- sql("select * from customer")
+# Print schema
+> printSchema(customers)
+root
+ |-- c_custkey: long (nullable = true)
+ |-- c_name: string (nullable = true)
+ |-- c_address: string (nullable = true)
+ |-- c_nationkey: long (nullable = true)
+ |-- c_phone: string (nullable = true)
+ |-- c_acctbal: decimal(15,2) (nullable = true)
+ |-- c_mktsegment: string (nullable = true)
+ |-- c_comment: string (nullable = true)
+ 
+# Run a count query
+> count <- sql("select count(*) from customer")
+# Print count result
+> head(count)
+  count(1)
+1      150
+```
\ No newline at end of file

From cf8eeb48385840d6856702a8d5786b64d3e16378 Mon Sep 17 00:00:00 2001
From: tennix <ztennix@gmail.com>
Date: Tue, 17 Jul 2018 16:19:10 +0800
Subject: [PATCH 3/4] address comment

---
 README.md           |  4 +--
 tispark/R/README.md | 74 +++++++++++++++++++++++----------------------
 2 files changed, 40 insertions(+), 38 deletions(-)

diff --git a/README.md b/README.md
index 2a987e5..f557dbe 100644
--- a/README.md
+++ b/README.md
@@ -181,11 +181,11 @@ scala> spark.sql("select count(*) from lineitem").show
 +--------+
 ```
 
-You can also access Spark with Python or R using following commands:
+You can also access Spark with Python or R using the following commands:
 
 ```
 docker-compose exec tispark-master /opt/spark/bin/pyspark
 docker-compose exec tispark-master /opt/spark/bin/sparkR
 ```
 
-More document about TiSpark can be found [here](https://github.com/pingcap/tispark).
+More documents about TiSpark can be found [here](https://github.com/pingcap/tispark).
diff --git a/tispark/R/README.md b/tispark/R/README.md
index bfffdee..fab5d3f 100644
--- a/tispark/R/README.md
+++ b/tispark/R/README.md
@@ -1,42 +1,44 @@
 ## TiSparkR
-A thin layer build for supporting R language with TiSpark
+TiSparkR is a thin layer built to support the R language with TiSpark.
 
 ### Usage
-1. Download TiSparkR source code and build a binary package(run `R CMD build R` in TiSpark root directory). Install it to your local R library(e.g. via `R CMD INSTALL TiSparkR_1.0.0.tar.gz`)
+1. Download the TiSparkR source code and build a binary package (run `R CMD build R` in TiSpark root directory). Install it to your local R library (e.g. via `R CMD INSTALL TiSparkR_1.0.0.tar.gz`)
+
 2. Build or download TiSpark dependency jar `tispark-core-1.0-RC1-jar-with-dependencies.jar` [here](https://github.com/pingcap/tispark).
-3. `cd` to your Spark home directory, and run
-```
-./bin/sparkR --jars /where-ever-it-is/tispark-core-${version}-jar-with-dependencies.jar
-```
-Note that you should replace the `TiSpark` jar path with your own.
- 
+
+3. `cd` to your Spark home directory, and run:
+    ```
+    ./bin/sparkR --jars /where-ever-it-is/tispark-core-${version}-jar-with-dependencies.jar
+    ```
+    Note that you should replace the `TiSpark` jar path with your own.
+
 4. Use as below in your R console:
-```R
-# import tisparkR library
-> library(TiSparkR)
-# create a TiContext instance
-> ti <- createTiContext(spark)
-# Map TiContext to database:tpch_test
-> tidbMapDatabase(ti, "tpch_test")
+    ```R
+    # import tisparkR library
+    > library(TiSparkR)
+    # create a TiContext instance
+    > ti <- createTiContext(spark)
+    # Map TiContext to database:tpch_test
+    > tidbMapDatabase(ti, "tpch_test")
+
+    # Run a sql query
+    > customers <- sql("select * from customer")
+    # Print schema
+    > printSchema(customers)
+    root
+     |-- c_custkey: long (nullable = true)
+     |-- c_name: string (nullable = true)
+     |-- c_address: string (nullable = true)
+     |-- c_nationkey: long (nullable = true)
+     |-- c_phone: string (nullable = true)
+     |-- c_acctbal: decimal(15,2) (nullable = true)
+     |-- c_mktsegment: string (nullable = true)
+     |-- c_comment: string (nullable = true)
 
-# Run a sql query
-> customers <- sql("select * from customer")
-# Print schema
-> printSchema(customers)
-root
- |-- c_custkey: long (nullable = true)
- |-- c_name: string (nullable = true)
- |-- c_address: string (nullable = true)
- |-- c_nationkey: long (nullable = true)
- |-- c_phone: string (nullable = true)
- |-- c_acctbal: decimal(15,2) (nullable = true)
- |-- c_mktsegment: string (nullable = true)
- |-- c_comment: string (nullable = true)
- 
-# Run a count query
-> count <- sql("select count(*) from customer")
-# Print count result
-> head(count)
-  count(1)
-1      150
-```
\ No newline at end of file
+    # Run a count query
+    > count <- sql("select count(*) from customer")
+    # Print count result
+    > head(count)
+      count(1)
+    1      150
+    ```

From 8313e38d8483e7bf1ff737a2eb6737ea98c40aa5 Mon Sep 17 00:00:00 2001
From: tennix <ztennix@gmail.com>
Date: Mon, 23 Jul 2018 17:31:46 +0800
Subject: [PATCH 4/4] upgrade tispark version to 1.0.1

---
 tispark/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tispark/Dockerfile b/tispark/Dockerfile
index 62c6758..8271564 100644
--- a/tispark/Dockerfile
+++ b/tispark/Dockerfile
@@ -2,7 +2,7 @@ FROM anapsix/alpine-java:8
 
 ENV SPARK_VERSION=2.1.1 \
     HADOOP_VERSION=2.7 \
-    TISPARK_VERSION=1.0 \
+    TISPARK_VERSION=1.0.1 \
     TISPARK_R_VERSION=1.1 \
     TISPARK_PYTHON_VERSION=1.0.1 \
     SPARK_HOME=/opt/spark \