Skip to content

Commit

Permalink
Add code to fetch a book and its images from Alfresco
Browse files Browse the repository at this point in the history
  • Loading branch information
egli committed Sep 25, 2024
1 parent faba406 commit 8833f20
Showing 1 changed file with 61 additions and 43 deletions.
104 changes: 61 additions & 43 deletions src/clj/daisyproducer2/documents/alfresco.clj
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@

(:require [clj-http.client :as client]
[daisyproducer2.config :refer [env]]
[cheshire.core :as json]))
[cheshire.core :as json])
(:use [slingshot.slingshot :only [try+ throw+]]))

(defn- extract-paginated-result
"Extract `count` and `id` from a paginated result as returned from the Alfresco REST API"
Expand All @@ -21,6 +22,23 @@
id (-> entries first :entry :id)]
[id count]))

(defn- book
"Return the id of the book node for a given `isbn`"
[isbn]
(let [{:keys [url user password]} (env :alfresco)
query (format "select * from sbs:buch where sbs:pISBN = '%s' AND CONTAINS('PATH:\"/app:company_home/cm:Produktion/cm:Archiv//*\"')" isbn)
query-body (json/generate-string {:query {:query query :language "cmis"}})]
(let [[id count] (extract-paginated-result
(client/post (str url "/search/versions/1/search")
{:as :json
:basic-auth [user password]
:body query-body}))]
(if (= count 1)
id
(throw
(ex-info (format "%s books in archive for ISBN '%s'" count isbn)
{:error-id :multiple-books-in-archive}))))))

(defn- product
"Return the id of the product node for a given `product-id`"
[product-id]
Expand Down Expand Up @@ -68,55 +86,55 @@
(ex-info (format "more than one daisy-file in archive for node '%s'" node-id)
{:error-id :multiple-parents-in-archive}))))))

(defn- latest-version
"Return the version id of the latest version for a given `node-id`"
(defn- content
"Return the content for a given `node-id`"
[node-id]
(let [{:keys [url user password]} (env :alfresco)]
(first (extract-paginated-result
(client/get (str url "/alfresco/versions/1/nodes/" node-id "/versions")
{:as :json
:basic-auth [user password]
:query-params {"maxItems" "1"}})))))
(-> (str url "/alfresco/versions/1/nodes/" node-id "/content")
(client/get {:basic-auth [user password]})
:body)))

(defn- content
"Return the content for a given `node-id` and `version-id`"
[node-id version-id]
(defn- content-stream
"Return the content stream for a given `node-id`"
[node-id]
(let [{:keys [url user password]} (env :alfresco)]
(:body (client/get (str url "/alfresco/versions/1/nodes/" node-id "/versions/" version-id "/content")
{:basic-auth [user password]}))))
(-> (str url "/alfresco/versions/1/nodes/" node-id "/content")
(client/get {:as :stream :basic-auth [user password]})
:body)))

(defn- images
"Return a list of image node-ids for a given book `node-id`"
"Return a list of images for a given book `node-id`. The entries are
in the form of a map `{:id :name}`"
[node-id]
(let [{:keys [url user password]} (env :alfresco)]
(-> (str url "/alfresco/versions/1/nodes/" node-id "/children")
(client/get
{:as :json
:basic-auth [user password]
:query-params {"relativePath" "Bilder"
"where" "(nodeType='sbs:graphic')"
;; ignore the fact that this is paginated content. Just fetch
;; lots of items so that we most likely get them all
"maxItems" 5000
"fields" "id,content"}})
(get-in [:body :list :entries])
(->>
(filter (fn [item] (= (get-in item [:entry :content :mimeType]) "image/jpeg")))
(map #(get-in % [:entry :id]))))))

(defn- image-content [ids]
(let [{:keys [url user password]} (env :alfresco)]
(client/post (str url "/alfresco/versions/1/downloads/")
{:as :json
:basic-auth [user password]
:body (json/generate-string {:nodeIds (apply vector ids)})})
;; grab the id of the download from the response
;; wait until the download is ready, i.e. the response contains :status "DONE"
;; then fetch the content
;; all of this probably asynchronously
))
(try+
(-> (str url "/alfresco/versions/1/nodes/" node-id "/children")
(client/get
{:as :json
:basic-auth [user password]
:query-params {"relativePath" "Bilder"
"where" "(nodeType='sbs:graphic')"
;; ignore the fact that this is paginated content. Just fetch
;; lots of items so that we most likely get them all
"maxItems" 5000
"fields" "id,content,name"}})
(get-in [:body :list :entries])
(->>
(filter (fn [item] (= (get-in item [:entry :content :mimeType]) "image/jpeg")))
(map (fn [{entry :entry}] (select-keys entry [:id :name])))))
(catch [:status 404] ;; return an empty list if there was a 404, i.e. there is no "Bilder" folder
[])
(catch Object _ (throw+)))))

(defn content-for-product [product-id]
(let [daisy-file-node (-> product-id product parent daisy-file)
version (latest-version daisy-file-node)]
(content daisy-file-node version)))
(let [daisy-file-node (-> product-id product parent daisy-file)]
(content daisy-file-node)))

(defn content-for-isbn [isbn]
(let [daisy-file-node (-> isbn book daisy-file)]
(content daisy-file-node)))

(defn images-for-isbn [isbn]
(let [book-node (book isbn)]
(->> (images book-node)
(map (fn [{:keys [id name]}] {:name name :content (content-stream id)})))))

0 comments on commit 8833f20

Please sign in to comment.