Updating Column Values in Clojure Map


(require '[huri.core :as h]
         '[clojure.core.matrix.dataset :as d]
         '[incanter.core :as i])

(def ds [{:id 1.0 :name "name1"}
         {:id 2.0 :name "name2"}
         {:id 3.0 :name "name3"}])

;; UPDATE THE :NAME COLUMN IN THE DATASET
;; - IF THE VALUE IS NOT "NAME2", THEN CHANGE TO "NOT 2"
;;
;; EXPECTED OUTPUT:
;; | :id | :name |
;; |-----+-------|
;; | 1.0 | not 2 |
;; | 2.0 | name2 |
;; | 3.0 | not 2 |

;; WITH CLOJURE.CORE/UPDATE
(def d1 (map (fn [x] (update x :name #(if (= "name2" %) % "not 2"))) ds))

;; WITH CLOJURE.CORE/UPDATE-IN
(def d2 (map (fn [x] (update-in x [:name] #(if (= "name2" %) % "not 2"))) ds))

;; WITH HURI/UPDATE-COLS
(def d3 (h/update-cols {:name #(if (= "name2" %) % "not 2")} ds))

;; WITH MATRIX.DATASET/EMAP-COLUMN
(def d4 (-> ds
            (d/dataset)
            (d/emap-column :name #(if (= "name2" %) % "not 2"))
            ((comp #(map into %) d/row-maps))))
   
;; WITH INCANTER/TRANSFORM-COL
(def d5 (-> ds
            (i/to-dataset)
            (i/transform-col :name #(if (= "name2" %) % "not 2"))
            ((comp #(map into %) second vals))))

Adding New Columns to Clojure Map


(require '[huri.core :as h]
         '[clojure.core.matrix.dataset :as d]
         '[incanter.core :as i])

(def ds [{:id 1.0 :name "name1"}
         {:id 2.0 :name "name2"}
         {:id 3.0 :name "name3"}])

;; ADD 2 COLUMNS TO THE DATASET
;; - ADD 2 TO ID AND NAME ADD2
;; - CHECK NAME = "name2" AND NAME NAME2
;;
;; EXPECTED OUTPUT:
;;| :id | :name | :add2 | :name2 |
;;|-----+-------+-------+--------|
;;| 1.0 | name1 |   3.0 |      N |
;;| 2.0 | name2 |   4.0 |      Y |
;;| 3.0 | name3 |   5.0 |      N |

;; WITH PLAIN CLOJURE
;; #1 - MERGE
(def d1 (map #(merge % {:add2 (+ (:id %) 2) 
                        :name2 (if (= "name2" (:name %)) "Y" "N")}) ds))

;; #2 - MERGE-WITH
(def d2 (map #(merge-with into % {:add2 (+ (:id %) 2)
                                  :name2 (if (= "name2" (:name %)) "Y" "N")}) ds))

;; #3 - ASSOC
(def d3 (map #(assoc % :add2 (+ (:id %) 2) 
                       :name2 (if (= "name2" (:name %)) "Y" "N")) ds))

;; #4 - CONJ
(def d4 (map #(conj % {:add2 (+ (:id %) 2)
                       :name2 (if (= "name2" (:name %)) "Y" "N")}) ds))

;; #5 - CONCAT 
(def d5 (map #(into {} (concat % {:add2 (+ (:id %) 2)
                                  :name2 (if (= "name2" (:name %)) "Y" "N")})) ds))

;; WITH HURI 
(def d6 (h/derive-cols {:name2 [#(if (= "name2" %) "Y" "N") :name] 
                        :add2 [#(+ 2  %) :id]} ds))

;; WITH CORE.MATRIX API
(def d7 (-> ds
            (d/dataset)
            (d/add-column :add2 (map #(+ 2 %) (map :id ds)))
            (d/add-column :name2 (map #(if (= "name2" %) "Y" "N") (map :name ds)))
            (d/row-maps)))

;; WITH INCANTER API
(def d8 (->> ds
             (i/to-dataset)
             (i/add-derived-column :add2 [:id] #(+ 2 %))
             (i/add-derived-column :name2 [:name] #(if (= "name2" %) "Y" "N"))
             ((comp second vals))))

;; CHECK THE DATA EQUALITY
(= d1 d2 d3 d4 d5 d6 d7 d8)
;; true

Transpose in Clojure


(require '[huri.core :as h]
         '[clojure.core.matrix.dataset :as d]
         '[incanter.core :as i])

;; FROM MAP OF ROWS TO MAP OF COLUMNS

(def byRow [{:x 1 :y "a"}
            {:x 2 :y "b"}
            {:x 3 :y "c"}])

;; APPROACH #1 - PLAIN CLOJURE
(zipmap (keys (first byRow)) (apply map list (map vals byRow)))

; {:x (1 2 3), :y ("a" "b" "c")}

;; APPROACH #2 - HURI LIBRARY
(h/col-oriented byRow)

; {:x (1 2 3), :y ("a" "b" "c")}

;; APPROACH #3 - CORE.MATRIX LIBRARY
(d/to-map (d/dataset (keys (first byRow)) byRow))

; {:x [1 2 3], :y ["a" "b" "c"]}

;; APPROACH #4 - INCANTER LIBRARY
(i/to-map (i/to-dataset byRow))

; {:x (1 2 3), :y ("a" "b" "c")}

;; FROM MAP OF COLUMNS TO MAP OF ROWS

(def byCol {:x '(1 2 3)
            :y '("a" "b" "c")})

;; APPROACH #1 - PLAIN CLOJURE
(map #(zipmap (keys byCol) %) (apply map list (vals byCol)))

; ({:x 1, :y "a"} {:x 2, :y "b"} {:x 3, :y "c"})

;; APPROACH #2 - HURI LIBRARY
(h/row-oriented byCol)

; ({:x 1, :y "a"} {:x 2, :y "b"} {:x 3, :y "c"})

;; APPROACH #3 - CORE.MATRIX LIBRARY
(d/row-maps (d/dataset (keys byCol) byCol))

; [{:x 1, :y "a"} {:x 2, :y "b"} {:x 3, :y "c"}]

;; APPROACH #4 - INCANTER LIBRARY
(second (vals (i/dataset (keys byCol) (apply map list (vals byCol)))))

; ({:x 1, :y "a"} {:x 2, :y "b"} {:x 3, :y "c"})

Two Ways to Select Rows in Incanter

user=> (use '(incanter core io))
nil

user=> (def iris (read-dataset "../data/iris.dat" :header true :delim \space))
#'user/iris

user=> (sel iris :rows (range 3))
[:Sepal.Length :Sepal.Width :Petal.Length :Petal.Width :Species]
[5.1 3.5 1.4 0.2 "setosa"]
[4.9 3.0 1.4 0.2 "setosa"]
[4.7 3.2 1.3 0.2 "setosa"]

;; METHOD 1 - USING $WHERE 
user=> ($where {:Species {:in #{"virginica" "setosa"}} :Sepal.Length {:gt 5.5, :lt 6.0}} iris)
[:Sepal.Length :Sepal.Width :Petal.Length :Petal.Width :Species]
[5.8 4.0 1.2 0.2 "setosa"]
[5.7 4.4 1.5 0.4 "setosa"]
[5.7 3.8 1.7 0.3 "setosa"]
[5.8 2.7 5.1 1.9 "virginica"]
[5.7 2.5 5.0 2.0 "virginica"]
[5.8 2.8 5.1 2.4 "virginica"]
[5.6 2.8 4.9 2.0 "virginica"]
[5.8 2.7 5.1 1.9 "virginica"]
[5.9 3.0 5.1 1.8 "virginica"]

;; METHOD 2 - USING QUERY-DATASET
user=> (query-dataset iris {:Species {:in #{"virginica" "setosa"}} :Sepal.Length {:gt 5.5, :lt 6.0}})
[:Sepal.Length :Sepal.Width :Petal.Length :Petal.Width :Species]
[5.8 4.0 1.2 0.2 "setosa"]
[5.7 4.4 1.5 0.4 "setosa"]
[5.7 3.8 1.7 0.3 "setosa"]
[5.8 2.7 5.1 1.9 "virginica"]
[5.7 2.5 5.0 2.0 "virginica"]
[5.8 2.8 5.1 2.4 "virginica"]
[5.6 2.8 4.9 2.0 "virginica"]
[5.8 2.7 5.1 1.9 "virginica"]
[5.9 3.0 5.1 1.8 "virginica"]

Read CSV Data File with Clojure

;; LOAD PACKAGES
user=> (use '(incanter core io))
nil

;; READ CSV FILE
user=> (def ds1 (read-dataset "../data/credit_count.csv" :header true :delim \,))
#'user/ds1

;; EXAMINE THE DATA
user=> ($ (range 0 3) :all ds1)
[:CARDHLDR :DEFAULT :AGE :ACADMOS :ADEPCNT :MAJORDRG :MINORDRG :OWNRENT :INCOME :SELFEMPL :INCPER :EXP_INC :SPENDING :LOGSPEND ]
[0 0 27.25 4 0 0 0 0 1200 0 18000 6.667E-4 " " "  "]
[0 0 40.8333321 111 3 0 0 1 4000 0 13500 2.222E-4 " " "  "]
[1 0 37.6666679 54 3 0 0 1 3666.6666667 0 11300 0.0332699 121.9896773 4.8039364]

;; CALCULATE SUMMARY BY GROUP
user=> ($rollup :mean :INCOME [:CARDHLDR :DEFAULT] ds1)
[:DEFAULT :CARDHLDR :INCOME]
[1 1 2156.117553547691]
[0 1 2653.2908642884945]
[0 0 2165.1530843234673]

Read Space-Delimited File with Clojure

;; load clojure libraries
user=> (use 'incanter.core 'incanter.io)
nil

;; import data file
user=> (def iris (read-dataset "../data/iris.dat" :header true :delim \space))
#'user/iris

;; show data header
user=> (col-names iris)
[:Sepal.Length :Sepal.Width :Petal.Length :Petal.Width :Species]

;; show first 3 data records with $
user=> ($ (range 0 3) :all iris)
[:Sepal.Length :Sepal.Width :Petal.Length :Petal.Width :Species]
[5.1 3.5 1.4 0.2 "setosa"]
[4.9 3.0 1.4 0.2 "setosa"]
[4.7 3.2 1.3 0.2 "setosa"]