From 1768e79dea0b0258c42305357ff287678091f41c Mon Sep 17 00:00:00 2001 From: Lianet Sepulveda Torres Date: Fri, 30 Aug 2024 12:50:44 -0400 Subject: [PATCH] Keeping the features and Solr architecture we will use. * Solr6 standalone mode => current production architecture * Solr8 cloud mode with external zookeeper * Python application running in docker to manage configsets and collections * Bash script to index a sample of documents * Updated the git workflow to build the image used in docker and kubernetes. --- .github/workflows/build-manual.yaml | 36 +- README.md | 552 +++++++++++------- docker-compose.yml | 81 +++ docker-compose_embedded_zooKeeper.yml | 21 - docker-compose_external_zooKeeper.yml | 126 ---- docker-compose_solr6_standalone.yml | 3 - indexing_data.sh | 8 +- init_solr_manager.sh | 17 + .../Dockerfile | 16 +- .../conf/1000common.txt | 0 .../conf/BM25/similarity.xml | 0 .../conf/configoverlay.json | 0 .../indexing_core_specific/1/mergePolicy.xml | 0 .../indexing_core_specific/10/mergePolicy.xml | 0 .../indexing_core_specific/11/mergePolicy.xml | 0 .../indexing_core_specific/12/mergePolicy.xml | 0 .../indexing_core_specific/2/mergePolicy.xml | 0 .../indexing_core_specific/3/mergePolicy.xml | 0 .../indexing_core_specific/4/mergePolicy.xml | 0 .../indexing_core_specific/5/mergePolicy.xml | 0 .../indexing_core_specific/6/mergePolicy.xml | 0 .../indexing_core_specific/7/mergePolicy.xml | 0 .../indexing_core_specific/8/mergePolicy.xml | 0 .../indexing_core_specific/9/mergePolicy.xml | 0 .../old/1000common_deprecated_2020Feb.txt | 0 .../conf/schema.xml | 0 .../conf/serve/mergePolicy.xml | 0 .../conf/solrconfig.xml | 0 .../conf/tfidf/similarity.xml | 0 .../init_files/solr_init.sh | 0 .../lib/HTPostingsFormat.jar | Bin .../lib/icu4j-62.1.jar | Bin .../lib/lucene-analyzers-icu-8.2.0.jar | Bin ...cene-umich-solr-filters-3.0-solr-8.8.2.jar | Bin .../security.json | 0 .../collection_manager.sh | 13 - .../init_files/solr_init.sh | 26 - .../collection_manager.sh | 7 - .../solr_standalone_mode/core.properties | 5 - solr_manager/README.md | 17 +- solr_manager/docker-compose.yml | 182 ------ solr_manager/init_solr_manager.sh | 17 - solr_manager/solr_collection_manager.py | 2 +- solr_manager/solr_collection_manager_test.py | 4 +- solr_manager/{solr_files => tests}/conf.zip | Bin 29610 -> 29610 bytes 45 files changed, 476 insertions(+), 657 deletions(-) create mode 100644 docker-compose.yml delete mode 100644 docker-compose_embedded_zooKeeper.yml delete mode 100644 docker-compose_external_zooKeeper.yml create mode 100755 init_solr_manager.sh rename {solr8.11.2_files => solr8.11.2_cloud}/Dockerfile (61%) rename {solr8.11.2_files => solr8.11.2_cloud}/conf/1000common.txt (100%) rename {solr8.11.2_files => solr8.11.2_cloud}/conf/BM25/similarity.xml (100%) rename {solr8.11.2_files => solr8.11.2_cloud}/conf/configoverlay.json (100%) rename {solr8.11.2_files => solr8.11.2_cloud}/conf/indexing_core_specific/1/mergePolicy.xml (100%) rename {solr8.11.2_files => solr8.11.2_cloud}/conf/indexing_core_specific/10/mergePolicy.xml (100%) rename {solr8.11.2_files => solr8.11.2_cloud}/conf/indexing_core_specific/11/mergePolicy.xml (100%) rename {solr8.11.2_files => solr8.11.2_cloud}/conf/indexing_core_specific/12/mergePolicy.xml (100%) rename {solr8.11.2_files => solr8.11.2_cloud}/conf/indexing_core_specific/2/mergePolicy.xml (100%) rename {solr8.11.2_files => solr8.11.2_cloud}/conf/indexing_core_specific/3/mergePolicy.xml (100%) rename {solr8.11.2_files => solr8.11.2_cloud}/conf/indexing_core_specific/4/mergePolicy.xml (100%) rename {solr8.11.2_files => solr8.11.2_cloud}/conf/indexing_core_specific/5/mergePolicy.xml (100%) rename {solr8.11.2_files => solr8.11.2_cloud}/conf/indexing_core_specific/6/mergePolicy.xml (100%) rename {solr8.11.2_files => solr8.11.2_cloud}/conf/indexing_core_specific/7/mergePolicy.xml (100%) rename {solr8.11.2_files => solr8.11.2_cloud}/conf/indexing_core_specific/8/mergePolicy.xml (100%) rename {solr8.11.2_files => solr8.11.2_cloud}/conf/indexing_core_specific/9/mergePolicy.xml (100%) rename {solr8.11.2_files => solr8.11.2_cloud}/conf/old/1000common_deprecated_2020Feb.txt (100%) rename {solr8.11.2_files => solr8.11.2_cloud}/conf/schema.xml (100%) rename {solr8.11.2_files => solr8.11.2_cloud}/conf/serve/mergePolicy.xml (100%) rename {solr8.11.2_files => solr8.11.2_cloud}/conf/solrconfig.xml (100%) rename {solr8.11.2_files => solr8.11.2_cloud}/conf/tfidf/similarity.xml (100%) rename {solr8.11.2_files/solrCloud_external_zooKeeper => solr8.11.2_cloud}/init_files/solr_init.sh (100%) rename {solr8.11.2_files => solr8.11.2_cloud}/lib/HTPostingsFormat.jar (100%) rename {solr8.11.2_files => solr8.11.2_cloud}/lib/icu4j-62.1.jar (100%) rename {solr8.11.2_files => solr8.11.2_cloud}/lib/lucene-analyzers-icu-8.2.0.jar (100%) rename {solr8.11.2_files => solr8.11.2_cloud}/lib/lucene-umich-solr-filters-3.0-solr-8.8.2.jar (100%) rename {solr8.11.2_files => solr8.11.2_cloud}/security.json (100%) delete mode 100755 solr8.11.2_files/solrCloud_embedded_zooKeeper/collection_manager.sh delete mode 100644 solr8.11.2_files/solrCloud_embedded_zooKeeper/init_files/solr_init.sh delete mode 100755 solr8.11.2_files/solrCloud_external_zooKeeper/collection_manager.sh delete mode 100644 solr8.11.2_files/solr_standalone_mode/core.properties delete mode 100644 solr_manager/docker-compose.yml delete mode 100755 solr_manager/init_solr_manager.sh rename solr_manager/{solr_files => tests}/conf.zip (99%) diff --git a/.github/workflows/build-manual.yaml b/.github/workflows/build-manual.yaml index efc548b..b0d75d2 100644 --- a/.github/workflows/build-manual.yaml +++ b/.github/workflows/build-manual.yaml @@ -32,43 +32,27 @@ jobs: platforms: linux/amd64,linux/arm64 push: true tags: ghcr.io/hathitrust/lss-solr:unstable - - - name: Build and push - solr 8, standalone - uses: docker/build-push-action@v5 - with: - context: . - file: ./solr8.11.2_files/Dockerfile - platforms: linux/amd64,linux/arm64 - push: true - tags: ghcr.io/hathitrust/full-text-search-solr:example-8.11 - target: standalone - - name: Build and push - solr 8, embedded zookeeper - uses: docker/build-push-action@v5 - with: - context: . - file: ./solr8.11.2_files/Dockerfile - platforms: linux/amd64,linux/arm64 - push: true - tags: ghcr.io/hathitrust/full-text-search-embedded_zoo:example-8.11 - target: embedded_zookeeper - - - name: Build and push - solr 8, external zookeeper + # use this image to start up a solr cluster in docker-compose + - + name: Build and push - solr 8, external zookeeper uses: docker/build-push-action@v5 with: context: . file: ./solr8.11.2_files/Dockerfile platforms: linux/amd64,linux/arm64 push: true - tags: ghcr.io/hathitrust/full-text-search-cloud:example-8.11 - target: external_zookeeper + tags: ghcr.io/hathitrust/full-text-search-cloud:shards-docker + target: external_zookeeper_docker - - name: Build and push - solr 8, external zookeeper + # use this image to start up a solr cluster in Kubernetes + - + name: Build and push - solr 8, external zookeeper uses: docker/build-push-action@v5 with: context: . file: ./solr8.11.2_files/Dockerfile platforms: linux/amd64,linux/arm64 push: true - tags: ghcr.io/hathitrust/full-text-search-cloud:shards-8.11 - target: external_zookeeper_kubernetes + tags: ghcr.io/hathitrust/full-text-search-cloud:shards-kubernetes + target: common diff --git a/README.md b/README.md index 2d458d9..03e7a6b 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,148 @@ -# lss_solr_configs -Configuration files for HT full-text search (ls) Solr +
+

+ lss_solr_configs +
+
+ Report Bug + - + Request Feature +

+ +## Table Of Contents + +* [About the Project](#about-the-project) +* [Built With](#built-with) +* [Phases](#phases) +* [Project Set Up](#project-set-up) + * [Prerequisites](#prerequisites) + * [Installation](#installation) + * [Creating A Pull Request](#creating-a-pull-request) +* [Content Structure](#content-structure) + * [Project Structure](#project-structure) + * [Site Maps](#site-maps) +* [Design](#design) +* [Functionality](#functionality) +* [Usage](#usage) +* [Tests](#tests) +* [Hosting](#hosting) +* [Resources](#resources) + +## About The Project + +This project is a configuration for Solr 6 and 8 to be used in the HathiTrust full-text search. + +The main problem we are trying to solve is to provide HathiTrust custom architecture to Solr server to deal with: +* Huge indexes that require significant changes to Solr defaults to work; +* Custom indexing to deal with multiple languages and huge documents. + +The initial version of HT Solr cluster runs in Solr 6 in standalone mode. The current proposal of this repository +is to upgrade the Solr server to Solr 8 in cloud mode. However, the Solr 6 server documentation will be here for a +while as legacy and to use it as a reference. + +## Built With + +* [Solr](https://lucene.apache.org/solr/) +* [Docker](https://www.docker.com/) +* [Kubernetes](https://kubernetes.io/) +* [Python](https://www.python.org/) +* [Java](https://www.java.com/) +* [Bash](https://www.gnu.org/software/bash/) +* [XML](https://www.w3.org/XML/) + +## Phases + +The project is divided into four phases. Each phase has a specific goal to achieve. + +* **Phase 1**: Upgrade Solr server from Solr 6 to Solr 8 in cloud mode + * Understand the Solr 6 architecture to migrate to Solr 8 + * Create a docker image for Solr 8 in cloud mode + * Create a docker image for Solr 8 and external Zookeeper + * Create a docker image for Solr 8 and embedded Zookeeper +* **Phase 2**: Index data in Solr 8 and integrate it in [babel-local-dev](https://github.com/hathitrust/babel-local-dev) +and [ht_indexer](https://github.com/hathitrust/ht_indexer) + * Create a script to automate the process of indexing data in Solr 8 +* **Phase 3**: Set up the Solr cluster in Kubernetes with data persistence and security + * Create a Kubernetes deployment for Solr 8 and external Zookeeper + * Deploy the Solr cluster in Kubernetes + * Create a Python module to manage Solr collections and configsets + * Clean up the code and documentation + +## Project Set Up + +### Prerequisites + +* Docker +* Python +* Java + +### Installation + +1. Clone the repo + ``` git clone git@github.com:hathitrust/lss_solr_configs.git ``` -## What is the problem we are trying to solve +2. Start the Solr server in standalone mode + ``` docker-compose -f docker-compose_solr6_standalone.yml up ``` -These files customize Solr for HT full-text search for Solr 6 and 8 and in standalone and cloud mode. -Our very large indexes require significant changes to Solr defaults in order to work. -We also have custom indexing to deal with multiple languages, and very large documents. +3. Start the Solr server in cloud mode + ``` docker-compose -f docker-compose.yml up ``` -## [Legacy] Overview Solr 6 +## Content Structure + +### Project Structure + +``` +lss_solr_configs/ +├── solr_manager/ +│ ├── Dockerfile +│ ├── .env +│ ├── solr_init.sh +│ ├── security.json +│ ├── collection_manager.sh +│ └── README.md +├── solr8.11.2_cloud/ +│ ├── Dockerfile +│ ├── solrconfig.xml +│ ├── schema.xml +│ ├── core.properties +│ ├── lib/ +│ └── data/ +├── solr6_standalone/ +├── docker-compose_test.yml +├── docker-compose_solr6_standalone.yml +├── README.md +└── indexing_data.sh +``` -A solr configuration for LSS consists of five symlinks in the same directory -that point to the correct files for that core: +## Design + +* **solr6_standalone**: Contains the Dockerfile and configuration files for Solr 6 in standalone mode. +* **solr8.11.2_cloud**: Contains the Dockerfile and configuration files for Solr 8.11.2 in cloud mode. + * Dockerfile: Dockerfile for building the Solr 8.11.2 cloud image. + * Create the image with the target:**external_zookeeper_docker** to run Solr in Docker. This application uses + the script init_files/solr_init.sh to copy a custom security.json file to initialize Solr and external + Zookeeper using the Basic authentication. + * Create the image with the target: **common** to run Solr in Kubernetes. Solr will start automatically without + the need to run the script solr_init.sh. + +The image will copy files that are relevant to set up the cluster + +* **conf/**: Directory for Solr configuration files. + * solrconfig.xml: Solr configuration file. + * schema.xml: Solr schema file. +* **lib/**: Directory for JAR files. + +* **solr_manager**: Contains the Dockerfile and scripts for managing Solr collections and configurations using Python. + * This application will have access to any Solr server running in Docker or Kubernetes. + * Inside the solr_manager you will see the Dockerfile for building the image to run the Python application and its + documentation. + * To create collections and to upload configsets, Solr requires Admin credentials, then you will need it + to provide the Solr admin password as an environment variable. +* **indexing_data.sh**: Use this script for indexing data into Solr when Solr cluster is running. + +### [Legacy] Overview Solr 6 + +A solr configuration for LSS consists of five symlinks in the same directory that point to the correct +files for that core: Three of these symlinks will point to the same file regardless of what core you're configuring: @@ -31,7 +163,7 @@ scores and are stored in the corresponding directories. We've been linking the tfidf file into `core-#x` and the BM25 into `core-#y` for each of the cores. * `mergePolicy.xml` configures merge variables and ramBuffer size for each - individual core (as [specified in Confluence](https://tools.lib.umich.edu/confluence/display/HAT/Tuning+re-indexing)), + core (as [specified in Confluence](https://tools.lib.umich.edu/confluence/display/HAT/Tuning+re-indexing)), with a goal of making them different enough that it's less likely that many cores will be involved in big merges at the same time. @@ -39,68 +171,34 @@ of the cores. indexing servers should use the core-specific version in the `indexing_core_specific` directory. -### How to start up the Solr 6 server in a standalone mode - -* Launch Solr server - * `docker-compose -f docker-compose_solr6_standalone.yml up` - -* Stop Solr server - * `docker-compose -f docker-compose_solr6_standalone down` - -* Go inside the Solr container - * `docker exec -it solr-lss-dev-8 /bin/bash` - -If you are using Apple Silicon M1 chip, you will get this error -no matching manifest for Linux/arm64/v8 in the -manifest list entries-. To fix it, just add this platform in the docker-compose.yml file as shown below: - -`platform: linux/amd64` - -#### How to integrate it in babel-local-dev - -Update _docker-compose.yml_ file inside babel directory replacing the service _solr-lss-dev_. Create a new one with the -following specifications: - -```solr-lss-dev: - image: solr:6.6.6-alpine - ports: - - "8983:8983" - user: ${CURRENT_USER} - volumes: - - ${BABEL_HOME}/lss_solr_configs/solr6_standalone/lss-dev/core-x:/opt/solr/server/solr/core-x - - ${BABEL_HOME}/lss_solr_configs/solr6_standalone/lss-dev/core-y:/opt/solr/server/solr/core-y - - ${BABEL_HOME}/lss_solr_configs/solr6_standalone:/opt/lss_solr_configs - - ${BABEL_HOME}/lss_solr_configs/solr6_standalone/lib:/opt/solr/server/solr/lib - - ${BABEL_HOME}/logs/solr:/opt/solr/server/logs - ``` - -# Overview Solr 8.11.2 in cloud mode +### Overview Solr 8 * Solr8.11.2 is the latest version of the 8.x series -### Upgrading our index from Solr6 to Solr8.11.2 (the last version) - -A Solr configuration for full-text search consists of creating a directory (solrdata) that contains all the -files and directories Solr needs to start up the server in standalone mode and with one core (core-x). +### Upgrading our index from Solr6 to Solr8.11.2 To set up Solr 8, the same logic and resources used with Solr 6 have reused, then, minimal changes were made on JAR files, -Solr schemas and config files. +Solr schemas, and solrconfig.xml files. See below the followed steps to upgrade the server from Solr 6 to Solr 8. -1) **Create a DockerFile to generate our own image. **Solr:8.11.2** was the image used**. - -- (DockerFile) To Solr recognize the cores, a directory with the core name, should be created inside** -the /var/solr/data folder. Inside each core directory, should be added: - - solrconfig.xml (configuration file with the most parameters affecting Solr itself) - The solrconfig.xml file is located in the conf/ directory for each collection or core - - data directory - - core.properties -- Solr cores are configured by placing a file named core.properties in a subdirectory - under solr.home. Each core has to be the core.properties field - - conf directory - - lib directory (All the used JARS must add into lib directory) -- **(DockerFile) In /var/sorl/data directory should be added the file solr.xml** - -2) **Copy some of the Java JARS that was already generated in Catalog** +1) **Create a DockerFile to generate our own image. + +- (DockerFile) The image was built using the official Solr image **Solr:8.11.2** + and adding the necessary files to set up the Solr server. We have to ensure the lib (JAR files) + directories are copied to the image. + - The lib directory contains the JAR files. + - The folder conf, that contains the configuration files used by Solr to index the documents, such as: + - schema.xml: The schema file that defines the fields and types of the documents + - solrconfig.xml: The configuration file that defines the handlers and configurations of the Solr server + - security.json: The security file that defines the authentication to access the Solr server +- The image was built with the target **external_zookeeper_docker** + to run Solr in Docker. This application uses the script init_files/solr_init.sh to copy a custom security.json file to + initialize Solr and external Zookeeper using the Basic authentication. + The image was built with the target **common** to run Solr in Kubernetes. +Solr will start automatically without the need to run the script solr_init.sh. + +2) **Copy some of the Java JARS that were already generated in Catalog** - icu4j-62.1.jar - lucene-analyzers-icu-8.2.0.jar - lucene-umich-solr-filters-3.0-solr-8.8.2.jar @@ -108,125 +206,159 @@ the /var/solr/data folder. Inside each core directory, should be added: 3) **Upgrading the JAR: HTPostingsFormatWrapper** (Check [here](https://github.com/hathitrust/lss_java_code/tree/master) to see all the steps to re-generate this JAR) 4) **Updating schema.xml** - -* _root_ field is type=int in Solr6 and type=string in Solr8. In Solr 8 _root_ field must be defined using the exact same fieldType as the uniqueKey field (id) uses: string - -5) **Create a docker-compose file to start up Solr server and for indexing data**. - - -# Overview Solr 8.11.2 in cloud mode: - -* On docker, the SolrCloud is a single replica and a single shard. If you want to add more you should copy solr and -zookeeper services in the docker-compose file. -* The server was set up combining Solr, ZooKeeper and Docker -* For development and production, we use an external ZooKeeper server + * _root_ field is type=int in Solr6 and type=string in Solr8. In Solr 8 _root_ field must be defined + using the exact same fieldType as the uniqueKey field (id) uses: string +5) **Updating solr8.11.2_cloud/conf/solrconfig.xml** + * This file has been updated along with this project. The date of each update was added in the file to track the changes. +6) **Create a docker-compose file to start up Solr server and for indexing data**. + +* On docker, the SolrCloud is a single replica and a single shard. + If you want to add more nodes of Solr and Zookeeper, you should copy solr and +zookeeper services in the docker-compose file. +Remember to update the port of each service. + +Although different architectures to set up Solr cloud have tested, the best option is to use an external +Zookeeper server because: + +* It is the recommended architecture for production environment; +* It is more stable and secure (In our solution, authentication is applied); +* It is easier to manage the Solr cluster and Zookeeper separately; +* It is easier to scale the Solr cluster. + +### Functionality + +In the docker-compose file, the address (a string) where ZooKeeper is running is defined, this way Solr is able +to connect to ZooKeeper server. +Additional environment variables have been added to ensure the Solr server starts up. +On this [page](https://hathitrust.atlassian.net/wiki/spaces/HAT/pages/3190292502/Solr+cloud+and+external+Zookeeper+parameters), +You can find a detail explanation of the environment variables used to set up Solr and Zookeeper in a docker. + +When the Solr cluster starts up, it is empty. +To upload configset and create new collections, the +[Solr API](https://solr.apache.org/guide/8_11/collections-api.html) is used for. +In this repository, +the Python package **solr_manager** is based on Solr collection API to manage Solr collections and configsets. On docker, to start up the Solr server in cloud mode, we mount the script init_files/solr_init.sh in the container to allow setting up the authentication using a predefined security.json file. - -The script specifies the ZK_HOST environment variable to point to the ZooKeeper server. It also copies the -security.json file to ZooKeeper using the solr zk cp command. In the docker-compose file, each Solr container should +It also copies the security.json file to ZooKeeper using the solr zk cp command. +In the docker-compose file, each Solr container should run a command to start up the Solr in foreground mode. -In the container, we should define health checks to verify the Zookeeper and Solr are working well. These health checks +In the container, we should define health checks to verify the Zookeeper and Solr are working well. +These health checks will help us to define the dependencies between the services in the docker-compose file. If we do not use the health checks, we probably will have to use the scripts wait-for-solr.sh and wait-for-zookeeper.sh to make sure the authentication is set up correctly. -On Kubernetes, none script is necessary to set up the authentication, because we use the -default Kubernetes secrets created by the operator. +On Kubernetes, none script is necessary to set up the authentication because the Solr operator +will create the secrets by default. +### Usage -### Upgrading our index from Solr6 to Solr8.11.2 (the last version) +#### How to start up the Solr 6 server in a standalone mode -Different architectures have tested to set up solr in cloud mode. +* Launch Solr server + * `docker-compose -f docker-compose_solr6_standalone.yml up` + +* Stop Solr server + * `docker-compose -f docker-compose_solr6_standalone down` -* Option 1: Version 8.11.2 Solr embedded ZooKeeper instance -* Option 2: Version 8.11.2 Solr and an external Zookeeper ensemble +* Go inside the Solr container + * `docker exec -it solr-lss-dev-8 /bin/bash` -## Solr 8.11.2 and an external Zookeeper +If you are using Apple Silicon M1 chip, you will get this error `-no matching manifest for Linux/arm64/v8 in the +manifest list entries-`. +To fix it, add this platform in the docker-compose.yml file as shown below: -* This is the recommended architecture for production environment -* In our solution, authentication is applied -* The integration in babel-local-dev repository is more verbose -* In the docker-compose file, the address (a string) where ZooKeeper is running is defined, this way Solr is able -to connect to ZooKeeper server -* Use [Solr API](https://solr.apache.org/guide/8_11/collections-api.html) for creating and set up the collection +`platform: linux/amd64` -To a better understanding of Solr and Zookeeper set up in a docker, see this [page](https://hathitrust.atlassian.net/wiki/spaces/HAT/pages/3190292502/Solr+cloud+and+external+Zookeeper+parameters). +#### How to integrate it in babel-local-dev -### How to create the image for Solr and external Zookeeper +Update _docker-compose.yml_ file inside babel directory replacing the service _solr-lss-dev_. Create a new one with the +following specifications: + +```solr-lss-dev: + image: solr:6.6.6-alpine + ports: + - "8983:8983" + user: ${CURRENT_USER} + volumes: + - ${BABEL_HOME}/lss_solr_configs/solr6_standalone/lss-dev/core-x:/opt/solr/server/solr/core-x + - ${BABEL_HOME}/lss_solr_configs/solr6_standalone/lss-dev/core-y:/opt/solr/server/solr/core-y + - ${BABEL_HOME}/lss_solr_configs/solr6_standalone:/opt/lss_solr_configs + - ${BABEL_HOME}/lss_solr_configs/solr6_standalone/lib:/opt/solr/server/solr/lib + - ${BABEL_HOME}/logs/solr:/opt/solr/server/logs + ``` + +#### How to start up the Solr 8 server in clode mode with external Zookeeper ``` -cd lss_solr_configs -export IMAGE_REPO=ghcr.io/hathitrust/full-text-search-external_zoo -docker build . --file solr8.11.2_files/Dockerfile --target external_zookeeper --tag $IMAGE_REPO:ext_zoo_8.11.2 -docker image tag xt_zoo_8.11.2:latest ghcr.io/hathitrust/full-text-search-external_zoo:shards-8.11 -docker image push ghcr.io/hathitrust/full-text-search-external_zoo:shards-8.11 +docker-compose -f docker-compose.yml up ``` -In Kubernetes, you should use a multiple platform image to run the Solr server. The recommendation is use the [github -actions workflow](https://github.com/hathitrust/lss_solr_configs/actions) to create the image for the different platforms. +* Start up the Solr server in cloud mode with external Zookeeper. The following services will run in the docker-compose file: + * solr1 + * zoo1 +In the folder .github/workflows, there is a workflow to create the image for Solr and external Zookeeper. +This workflow creates the image for the different platforms +(linux/amd64, linux/arm64, linux/arm/v7) and pushes the image to the +GitHub container registry. +You should use this image to start up the Solr server in Kubernetes. -lss_solr_configs repository -automatically generates the image for the different platforms on the git actions workflow. +In Kubernetes, you should use a multiple platform image to run the Solr server. +The recommendation is use the [github actions workflow](https://github.com/hathitrust/lss_solr_configs/actions) +to create the image for the different platforms. -If you are doing changes in the Dockerfile or in the solr_init.sh script it is better to create the image +If you are doing changes in the Dockerfile or in the solr_init.sh script, it is better to create the image each time you run the docker-compose file instead of using the image in the repository. Update the solr service adding the following lines: - -```build: - context: . - dockerfile: ./solr8.11.2_files/Dockerfile - target: external_zookeeper +``` + build: + context: . + dockerfile: solr8.11.2_cloud/Dockerfile + target: external_zookeeper_docker ``` -You should use the created image in the docker-container to start up `full-text-search-external_zoo` service - -### How to start up the Solr server and use it to create the collection and configset - -This application uses the script solrCloud_external_zooKeeper/init_files/solr_init.sh to start up the Solr cluster -with Basic authentication and uploading the configset used in the collection with fulltext documents. - -#### Command to start up the Solr cluster in cloud mode - `docker compose -f docker-compose_external_zooKeeper.yml up` - -You will see the following services running in the docker: -* full-text-search-external_zoo -* zoo1 - -You could use this option if you want to use Solr with any other application. +* [For testing] Manually create the Solr image with external Zookeeper -#### Command to start up the Solr cluster in cloud mode and create the collection and configset +``` +cd lss_solr_configs +export IMAGE_REPO=ghcr.io/hathitrust/full-text-search-cloud +docker build . --file solr8.11.2_files/Dockerfile --target external_zookeeper_docker --tag $IMAGE_REPO:shards-docker +docker image tag shards-docker:latest ghcr.io/hathitrust/full-text-search-cloud:shards-docker +docker image push ghcr.io/hathitrust/full-text-search-cloud:shards-docker +``` -The docker-compose (docker-compose_external_zooKeeper.yml) file contains additional services you can use with -Solr cluster if you start the docker using `--profile` option. +#### How to run the application to manage collections and configset -* Use the command below to create the collection using core-x configset. +The service to manage collections is defined in the docker-compose.yml. +As it is dependent on Solr, for convenience, it +is in the same docker-compose file. +However, once the solr_manager container is up, you can use it to manage any +collection in any Solr server running in Docker or Kubernetes, +because it is a Python module that receives the Solr URL +as a parameter. +You will have to pass the admin password to create collections and upload configsets. ``` export SOLR_PASSWORD='solr-admin-password' -docker compose -f docker-compose_external_zooKeeper.yml --profile collection_creator up +docker compose -f docker-compose.yml --profile solr_collection_manager up ``` -`collection_creator` is a service that will create the collection `core-x` using the already -created configset `core-x`. The configset core-x is uploaded when Solr server starts up. -To create collections, solr-admin-password is required; then it should be passed as an environment variable. - -* The following service will be created in the docker-compose file - * full-text-search-external_zoo +Using `--profile` option in the docker-compose file, you can start up the following services + * solr1 * zoo1 - * collection_creator + * solr_manager + +Read solr_manager/README.md to see how to use this module. + +#### How to run the application to create the Solr cluster with one collection -* Use the command below to start up the Solr cluster in cloud mode and manage Solr collections and configset using -the python module solr_manager. Read solr_manager/README.md to see how to use this module. -``` -docker compose -f docker-compose_external_zooKeeper.yml --profile solr_collection_manager up -``` #### How to integrate it in babel-local-dev @@ -234,11 +366,11 @@ Update _docker-compose.yml_ file inside babel directory replacing the service _s following specifications ```solr-lss-dev: - image: ghcr.io/hathitrust/full-text-search-external_zoo:shards-8.11 - container_name: full-text-search-external_zoo - ports: - - "8983:8983" - environment: + image: ghcr.io/hathitrust/solr-lss-dev:shards-docker + container_name: solr1 + ports: + - "8981:8983" + environment: - ZK_HOST=zoo1:2181 - SOLR_OPTS=-XX:-UseLargePages networks: @@ -248,13 +380,11 @@ following specifications condition: service_healthy volumes: - solr1_data:/var/solr/data - command: # Solr command to start the container to make sure the security.json is created - - solr-foreground -c + command: solr-foreground -c # Solr command to start the container to make sure the security.json is created healthcheck: - test: [ "CMD", "/usr/bin/curl", "-s", "-f", "http://full-text-search-external_zoo:8983/solr/#/admin/ping" ] - interval: 5s + test: [ "CMD", "/usr/bin/curl", "-s", "-f", "http://solr-lss-dev:8983/solr/#/admin/ping" ] + interval: 30s timeout: 10s - start_period: 30s retries: 5 zoo1: image: zookeeper:3.8.0 @@ -269,38 +399,44 @@ following specifications ZOO_SERVERS: server.1=zoo1:2888:3888;2181 ZOO_4LW_COMMANDS_WHITELIST: mntr, conf, ruok ZOO_CFG_EXTRA: "metricsProvider.className=org.apache.zookeeper.metrics.prometheus.PrometheusMetricsProvider metricsProvider.httpPort=7000 metricsProvider.exportJvmInfo=true" - ZOO_LOG_DIR: "/logs" networks: - solr volumes: - - zookeeper1_log:/logs # The log directory is used to store the Zookeeper logs - - zookeeper1_data:/data # The data directory is used to store the Zookeeper data - - zookeeper1_datalog:/datalog # The datalog directory is used to store the Zookeeper transaction logs + - zoo1_data:/data healthcheck: - test: [ "CMD", "/usr/bin/curl", "-s", "-f", "http://solr1:8983/solr/#/admin/ping" ] + test: [ "CMD", "echo", "ruok", "|", "nc", "localhost", "2181", "|", "grep", "imok" ] interval: 30s timeout: 10s retries: 5 - collection_creator: - container_name: collection_creator - build: - context: . - dockerfile: ./solr8.11.2_files/Dockerfile - target: external_zookeeper - entrypoint: [ "/bin/sh", "-c" ,"/var/solr/data/collection_manager.sh http://full-text-search-external_zoo:8983"] - volumes: - - solr1_data:/var/solr/data - depends_on: - full-text-search-external_zoo: - condition: service_healthy - networks: - - solr - profiles: [create_collections] - environment: - - SOLR_PASSWORD=$SOLR_PASSWORD + solr_manager: + build: + context: solr_manager + target: runtime + dockerfile: Dockerfile + args: + UID: ${UID:-1000} + GID: ${GID:-1000} + ENV: ${ENV:-dev} + POETRY_VERSION: ${POETRY_VERSION:-1.5.1} + SOLR_PASSWORD: ${SOLR_PASSWORD:-solr} + SOLR_USER: ${SOLR_USER:-solrRocks} + ZK_HOST: ${ZK_HOST:-zoo1:2181} + env_file: + - solr_manager/.env + volumes: + - .:/app + stdin_open: true + depends_on: + solr-lss-dev: + condition: service_healthy + tty: true + container_name: solr_manager + networks: + - solr + profiles: [ solr_collection_manager ] ``` -You might add the volume following list of volume to the docker-compose file. +You might add the following list of volume to the docker-compose file. ```solr1_data: @@ -315,12 +451,26 @@ You might add the volume following list of volume to the docker-compose file. * docker-compose build * docker-compose up -* and to create the collection and index documents in full-text search server use the command below +* To create the collection in full-text search server, use the command below * `docker exec solr-lss-dev /var/solr/data/collection_manager.sh` - -## How to index data using a sample of documents -Follow the steps below for indexing a sample of documents in Solr server. +* To index data in full-text search server use the command below + * `./indexing_data.sh http://localhost:8983 solr_pass ~/mydata data_sample.zip core-x` + + +## Hosting + +The Solr server is hosted in Kubernetes. + +Find [here](https://hathitrust.atlassian.net/wiki/spaces/HAT/pages/3163717633/Steps+to+start+up+the+Solr+cluster+in+Kubernetes+and+using+Argo+CD) +a detail explanation of how the Solr server was set up in Kubernetes + +Fulltext search Solr cluster Argocd application: +https://argocd.ictc.kubernetes.hathitrust.org/applications/argocd/fulltext-workshop-solrcloud?resource= + +## Resources + +### How-to index data using a sample of documents The sample of data is in `macc-ht-ingest-000.umdl.umich.edu:/htprep/fulltext_indexing_sample/data_sample.zip` @@ -329,12 +479,18 @@ The sample of data is in `macc-ht-ingest-000.umdl.umich.edu:/htprep/fulltext_ind * In your working directory, * After starting up the Solr server inside the docker, - * run the script `./indexing_data.sh`. + * run the script indexing_data.sh. You will need the admin password for doing that. + + ```./indexing_data.sh http://localhost:8983 solr_pass ~/mydata data_sample.zip collection_name`. ``` + + The script will extract all the XML files inside the Zip file to a destined folder. +Then, it will index the documents in Solr server. +The script input parameters are: + * solr_url + * solr password + * the path to the target folder to extract the files + * the path to the zip file with the sample of documents. - The script will extract all the XML files inside the Zip file to a destine folder. Then, it will index the documents in Solr server. -The script input parameters are: solr_url, the path to the target folder to extract the files and the path to the zip file. - * e.g. `./indexing_data.sh http://localhost:8983 ~/mydata data_sample.zip` - At the end of this process, your Solr server should have a sample of 150 documents. **Note**: If in the future we should automatize this process, a service to index documents could be included in the docker-compose. @@ -355,15 +511,10 @@ You will have to add the data sample to the docker image or download it from a r - solr ``` -## Create a JSON file for indexing data +### Useful commands -The JSON file core-data.json (/solr_json_documents) contains 1.978 generated using the python workflow. These documents are a sample of -the documents indexed in [catalog image](https://github.com/hathitrust/hathitrust_catalog_indexer). - - -## Useful commands - -* Command to create core-x collection. Recommendation: Pass the instanceDir and the dataDir to the curl command +* Command to create core-x collection. +Recommendation: Pass the instanceDir and the dataDir to the curl command * `curl -u solr:SolrRocks "http://localhost:8983/solr/admin/collections?action=CREATE&name=core-x&instanceDir=/var/solr/data/core-x&numShards=1&collection.configName=core-x&dataDir=/var/solr/data/core-x"` * Command to index documents into core-x collection, remove authentication if you do not need it @@ -386,12 +537,6 @@ the documents indexed in [catalog image](https://github.com/hathitrust/hathitrus * Below one can be used through browser to delete documents from Solr index: * `http://host:port/solr/collection_name/update?commit=true&stream.body=*:*` -### Pending & Next steps - -* [Otimization] To simplify the dokerization logic some directories have been duplicated in the different directories to -set up Solr cloud. We could check how the common directories could be added in the root of the repository and re-use -them in the docker files. - ## Deployment and Use Go to section `How to integrate it in babel-local-dev` to see how to integrate each Solr server into another application. @@ -402,7 +547,8 @@ Go to section `How to integrate it in babel-local-dev` to see how to integrate e ```ERROR org.apache.solr.cloud.SyncStrategy – No UpdateLog found - cannot sync``` -* In the Solr cloud logs with embedded ZooKeeper you could see the issue below. That is more of a warning than error, and +* In the Solr cloud logs with embedded ZooKeeper, you could see the issue below. +That is more of a warning than error, and it appears because we are running only one ZK in standalone mode. More details of this message [here](https://hathitrust.atlassian.net/wiki/spaces/HAT/pages/edit-v2/2661482577). @@ -413,12 +559,6 @@ message [here](https://hathitrust.atlassian.net/wiki/spaces/HAT/pages/edit-v2/26 ### Production Indexing ### Production Serving - -## Migrating to later versions of Solr/Lucene -This may need a separate document on confluence. Think about it... - -## Background details - ## Considerations for future modification ### Move to AWS @@ -434,4 +574,4 @@ This may need a separate document on confluence. Think about it... * [SolrCloud + ZooKeeper external server & data persistence](https://github.com/samuelrac/solr-cloud) * [Our documentation with more information](https://hathitrust.atlassian.net/wiki/spaces/HAT/pages/edit-v2/2661482577) * [An example of SolrCloud in Catalog](https://github.com/hathitrust/hathitrust_catalog_indexer/blob/main/README.md) -* \ No newline at end of file +* [How to set up Solr cloud cluster in Kubernetes](https://apache.github.io/solr-operator/docs/solr-cloud/solr-cloud-crd.html#prometheus-exporter) diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..ef841fc --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,81 @@ +# a docker-compose.yml to start a simple cluster with 1 ZooKeeper node (external) and 1 Solr nodes. +# Check this page (https://hathitrust.atlassian.net/wiki/spaces/HAT/pages/3190292502/Solr+cloud+and+external+Zookeeper+parameters) +# to a better understanding of Solr and Zookeeper set up. +services: + solr_manager: + build: + context: solr_manager + target: runtime + dockerfile: Dockerfile + args: + UID: ${UID:-1000} + GID: ${GID:-1000} + ENV: ${ENV:-dev} + POETRY_VERSION: ${POETRY_VERSION:-1.5.1} + SOLR_PASSWORD: ${SOLR_PASSWORD:-solr} + SOLR_USER: ${SOLR_USER:-solrRocks} + ZK_HOST: ${ZK_HOST:-zoo1:2181} + env_file: + - solr_manager/.env + volumes: + - .:/app + stdin_open: true + depends_on: + solr1: + condition: service_healthy + tty: true + container_name: solr_manager + networks: + - solr + profiles: [ solr_collection_manager ] + solr1: + build: + context: . + dockerfile: solr8.11.2_cloud/Dockerfile + target: external_zookeeper_docker + container_name: solr1 + ports: + - "8983:8983" + environment: + - ZK_HOST=zoo1:2181 + - SOLR_OPTS=-XX:-UseLargePages + networks: + - solr + depends_on: + zoo1: + condition: service_healthy + volumes: + - solr1_data:/var/solr/data + command: solr-foreground -c # Solr command to start the container to make sure the security.json is created + healthcheck: + test: [ "CMD", "/usr/bin/curl", "-s", "-f", "http://solr1:8983/solr/#/admin/ping" ] + interval: 30s + timeout: 10s + retries: 5 + zoo1: + image: zookeeper:3.8.0 + container_name: zoo1 + restart: always + hostname: zoo1 + ports: + - 2181:2181 + - 7001:7000 + environment: + ZOO_MY_ID: 1 + ZOO_SERVERS: server.1=zoo1:2888:3888;2181 + ZOO_4LW_COMMANDS_WHITELIST: mntr, conf, ruok + ZOO_CFG_EXTRA: "metricsProvider.className=org.apache.zookeeper.metrics.prometheus.PrometheusMetricsProvider metricsProvider.httpPort=7000 metricsProvider.exportJvmInfo=true" + networks: + - solr + volumes: + - zoo1_data:/data + healthcheck: + test: [ "CMD", "echo", "ruok", "|", "nc", "localhost", "2181", "|", "grep", "imok" ] + interval: 30s + timeout: 10s + retries: 5 +networks: + solr: +volumes: + solr1_data: null + zoo1_data: null \ No newline at end of file diff --git a/docker-compose_embedded_zooKeeper.yml b/docker-compose_embedded_zooKeeper.yml deleted file mode 100644 index a9a2029..0000000 --- a/docker-compose_embedded_zooKeeper.yml +++ /dev/null @@ -1,21 +0,0 @@ -services: - full-text-search-embedded_zoo: - image: full-text-search-embedded_zoo - container_name: full-text-search-embedded_zoo - build: - context: . - dockerfile: ./solr8.11.2_files/Dockerfile - target: embedded_zookeeper - ports: - - "8983:8983" - volumes: - - solr_data:/var/solr/data - command: solr-foreground -c - healthcheck: - test: ["CMD-SHELL", "solr healthcheck -c core-x"] - interval: 5s - timeout: 10s - start_period: 30s - retries: 5 -volumes: - solr_data: diff --git a/docker-compose_external_zooKeeper.yml b/docker-compose_external_zooKeeper.yml deleted file mode 100644 index fad29c6..0000000 --- a/docker-compose_external_zooKeeper.yml +++ /dev/null @@ -1,126 +0,0 @@ -services: - full-text-search-external_zoo: - image: ghcr.io/hathitrust/full-text-search-external_zoo:shards-8.11 - container_name: full-text-search-external_zoo - ports: - - "8983:8983" - environment: - - ZK_HOST=zoo1:2181 # If you want to add more zookeeper servers, you can add them separated. - # For example: ZK_HOST=zoo1:2181,zoo2:2181,zoo3:2181 - - SOLR_OPTS=-XX:-UseLargePages # get rid of "shared memory" warnings on Solr - # startup => https://solr.apache.org/guide/solr/latest/deployment-guide/docker-faq.html - networks: - - solr - depends_on: - zoo1: - condition: service_healthy - volumes: - - solr1_data:/var/solr/data - command: # Solr command to start the container to make sure the security.json is created - - solr-foreground -c - healthcheck: - test: [ "CMD", "/usr/bin/curl", "-s", "-f", "http://full-text-search-external_zoo:8983/solr/#/admin/ping" ] - interval: 5s - timeout: 10s - start_period: 30s # The time to wait before starting the healthcheck process after the container is started, - #it is useful to wait for Solr authentication to be ready - retries: 5 - zoo1: - image: zookeeper:3.8.0 - container_name: zoo1 - restart: always # Use this option to maintain the high availability of zookeeper service, the service will be restarted if it fails - hostname: zoo1 - ports: - - 2181:2181 - - 7001:7000 # Add this port for the Prometheus metrics provider. In the ZOO_CFG_EXTRA environment variable is - # specified that Zookeeper will expose its metrics on port 7000 (metricsProvider.httpPort=7000) - # for monitoring purposes. - environment: - ZOO_MY_ID: 1 # To make sure zookeeper server is listen in the right ip:port - # add unique Id for each Zookeeper server at - # /myid file. zookeeper1:/myid =1 , zookeeper2:/myid = 2. - # The dataDir is defined on the zoo.cfg. In our case is /data - ZOO_SERVERS: server.1=zoo1:2888:3888;2181 # The server.1 is the unique id of the server, - # the zoo1 is the hostname of the server, - # the 2888 is the port for the quorum communication, - # the 3888 is the port for the leader election, - # the 2181 is the client port. - # If you want to add more zookeeper nodes you add them here: - # For example: server.1=zoo1:2888:3888;2181 server.2=zoo2:2888:3888;2181 server.3=zoo3:2888:3888;2181 - ZOO_4LW_COMMANDS_WHITELIST: mntr, conf, ruok # The 4lw commands are used to monitor the Zookeeper server. - # It is a good practice to add this command to allow only the necessary commands to be executed. - # It is also an alternative to protect the security of Zookeeper server. - # These commands perform essential monitoring and health checks on the Zookeeper server. - # mntr: to monitor the Zookeeper server - # conf: to get the configuration of the Zookeeper server - # ruok: to check if the Zookeeper server is ok - # ZOO_CFG_EXTRA is used to add extra configuration to the zoo.cfg file. For example, to enable the Prometheus metrics provider - ZOO_CFG_EXTRA: "metricsProvider.className=org.apache.zookeeper.metrics.prometheus.PrometheusMetricsProvider metricsProvider.httpPort=7000 metricsProvider.exportJvmInfo=true" - ZOO_LOG_DIR: "/logs" - networks: - - solr - volumes: - - zookeeper1_log:/logs # The log directory is used to store the Zookeeper logs - - zookeeper1_data:/data # The data directory is used to store the Zookeeper data - - zookeeper1_datalog:/datalog # The datalog directory is used to store the Zookeeper transaction logs - - zookeeper1_wd:/apache-zookeeper-3.8.0-bin # The working directory is used to store the Zookeeper configuration files - healthcheck: - test: [ "CMD", "echo", "ruok", "|", "nc", "localhost", "2181", "|", "grep", "imok" ] - interval: 30s - timeout: 10s - retries: 5 - # Service that run the script collection_manager.sh to create Solr collections using the Solr API (curl command) - # As solr requires authentication, the container receives the SOLR_USER and SOLR_PASSWORD as environment variables - collection_creator: - container_name: collection_creator - build: - context: . - dockerfile: ./solr8.11.2_files/Dockerfile - target: external_zookeeper - entrypoint: [ "/bin/sh", "-c" ,"/var/solr/data/collection_manager.sh http://full-text-search-external_zoo:8983"] - volumes: - - solr1_data:/var/solr/data - depends_on: - full-text-search-external_zoo: - condition: service_healthy - networks: - - solr - profiles: [create_collections] - environment: - - SOLR_PASSWORD=$SOLR_PASSWORD - # Service that use the Python module solr_manager to manage Solr collections and configsets - solr_manager: - build: - context: ./solr_manager - target: runtime - dockerfile: Dockerfile - args: - UID: ${UID:-1000} - GID: ${GID:-1000} - ENV: ${ENV:-dev} - POETRY_VERSION: ${POETRY_VERSION:-1.5.1} - SOLR_PASSWORD: ${SOLR_PASSWORD:-solr} - SOLR_USER: ${SOLR_USER:-solrRocks} - ZK_HOST: ${ZK_HOST:-zoo1:2181,zoo2:2181,zoo3:2181} - env_file: - - ./solr_manager/.env - volumes: - - .:/app - stdin_open: true - depends_on: - full-text-search-external_zoo: - condition: service_healthy - tty: true - container_name: solr_manager - networks: - - solr - profiles: [ solr_collection_manager ] -networks: - solr: -volumes: - solr1_data: - - zookeeper1_data: - zookeeper1_datalog: - zookeeper1_log: - zookeeper1_wd: diff --git a/docker-compose_solr6_standalone.yml b/docker-compose_solr6_standalone.yml index 597f069..eeadfdf 100644 --- a/docker-compose_solr6_standalone.yml +++ b/docker-compose_solr6_standalone.yml @@ -1,6 +1,3 @@ ---- -version: '3' - services: solr-lss-dev: image: solr:6.6.6-alpine diff --git a/indexing_data.sh b/indexing_data.sh index 0ba7037..df14d4e 100755 --- a/indexing_data.sh +++ b/indexing_data.sh @@ -2,12 +2,14 @@ # This script is used to index the sample data into the Solr core -# Example: ./indexing_data.sh http://localhost:8983 ~/mydata data_sample.zip +# Example: ./indexing_data.sh http://localhost:8983 solr_pass ~/mydata data_sample.zip core-x -solr_url="$1" #Solr URL +solr_url="$1" + #Solr URL solr_pass="$2" sample_data_directory="$3" #Directory where the sample data is located (XML files) zip_file="$4" #Zip file containing the sample data +collection_name="$5" #Solr collection name if [ -d "$sample_data_directory" ] then @@ -23,7 +25,7 @@ echo $SOLR_PASS for file in "$sample_data_directory/data_sample/"*.xml do echo "Indexing $file 🌞!!!" - curl -u admin:$solr_pass "$solr_url/solr/core-x/update?commit=true" -H "Content-Type: text/xml" --data-binary @$file + curl -u admin:$solr_pass "$solr_url/solr/$collection_name/update?commit=true" -H "Content-Type: text/xml" --data-binary @$file done diff --git a/init_solr_manager.sh b/init_solr_manager.sh new file mode 100755 index 0000000..df481d1 --- /dev/null +++ b/init_solr_manager.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +if [ -f "solr_manager/.env" ]; then + echo "🌎 solr_manager/.env exists. Leaving alone" +else + echo "🌎 solr_manager/.env does not exist. Copying solr_manager/.env-example to solr_manager/.env" + cp solr_manager/env.example solr_manager/.env + + YOUR_UID=`id -u` + YOUR_GID=`id -g` + echo "🙂 Setting your UID ($YOUR_UID) and GID ($YOUR_UID) in .env" + docker run --rm -v ./solr_manager/.env:/solr_manager/.env alpine echo "$(sed s/YOUR_UID/$YOUR_UID/ solr_manager/.env)" > solr_manager/.env + docker run --rm -v ./solr_manager/.env:/solr_manager/.env alpine echo "$(sed s/YOUR_GID/$YOUR_GID/ solr_manager/.env)" > solr_manager/.env +fi + +echo "🚢 Run containers" +docker compose -f docker-compose.yml --profile solr_collection_manager up diff --git a/solr8.11.2_files/Dockerfile b/solr8.11.2_cloud/Dockerfile similarity index 61% rename from solr8.11.2_files/Dockerfile rename to solr8.11.2_cloud/Dockerfile index feeac8c..71a7353 100644 --- a/solr8.11.2_files/Dockerfile +++ b/solr8.11.2_cloud/Dockerfile @@ -13,27 +13,19 @@ ENV SOLR_OPTS="-Denable.packages=true -Dsolr.cloud.client.stallTime=30000 -Dsolr -XX:-UseLargePages" # Copy the required files to the image, lib and conf files -COPY --chown=solr:solr ./solr8.11.2_files/lib /var/solr/lib -COPY --chown=solr:solr ./solr8.11.2_files/conf /opt/solr/core-x +COPY --chown=solr:solr ../solr8.11.2_cloud/lib /var/solr/lib +COPY --chown=solr:solr ../solr8.11.2_cloud/conf /opt/solr/core-x #======================================================= FROM common AS external_zookeeper_docker # any steps specific to building the image with external zookeeper # authentication is enabled in the solr container -COPY --chown=solr:solr ./solr8.11.2_files/conf /opt/solr/core-x +COPY --chown=solr:solr ../solr8.11.2_cloud/security.json /opt/solr/security.json -COPY --chown=solr:solr ./solr8.11.2_files/security.json /opt/solr/security.json - -COPY --chown=solr:solr --chmod=0755 ./solr8.11.2_files/solrCloud_external_zooKeeper/init_files/solr_init.sh /usr/bin/solr_init.sh - -COPY --chown=solr:solr --chmod=0755 ./solr8.11.2_files/solrCloud_external_zooKeeper/collection_manager.sh /var/solr/data/collection_manager.sh +COPY --chown=solr:solr --chmod=0755 ../solr8.11.2_cloud/init_files/solr_init.sh /usr/bin/solr_init.sh # Run this script to start solr as an entrypoint to configure the container to run as an executable ENTRYPOINT ["/usr/bin/solr_init.sh"] -#======================================================= -#FROM common AS external_zookeeper_kubernetes -# any steps specific to building the image with external zookeeper - diff --git a/solr8.11.2_files/conf/1000common.txt b/solr8.11.2_cloud/conf/1000common.txt similarity index 100% rename from solr8.11.2_files/conf/1000common.txt rename to solr8.11.2_cloud/conf/1000common.txt diff --git a/solr8.11.2_files/conf/BM25/similarity.xml b/solr8.11.2_cloud/conf/BM25/similarity.xml similarity index 100% rename from solr8.11.2_files/conf/BM25/similarity.xml rename to solr8.11.2_cloud/conf/BM25/similarity.xml diff --git a/solr8.11.2_files/conf/configoverlay.json b/solr8.11.2_cloud/conf/configoverlay.json similarity index 100% rename from solr8.11.2_files/conf/configoverlay.json rename to solr8.11.2_cloud/conf/configoverlay.json diff --git a/solr8.11.2_files/conf/indexing_core_specific/1/mergePolicy.xml b/solr8.11.2_cloud/conf/indexing_core_specific/1/mergePolicy.xml similarity index 100% rename from solr8.11.2_files/conf/indexing_core_specific/1/mergePolicy.xml rename to solr8.11.2_cloud/conf/indexing_core_specific/1/mergePolicy.xml diff --git a/solr8.11.2_files/conf/indexing_core_specific/10/mergePolicy.xml b/solr8.11.2_cloud/conf/indexing_core_specific/10/mergePolicy.xml similarity index 100% rename from solr8.11.2_files/conf/indexing_core_specific/10/mergePolicy.xml rename to solr8.11.2_cloud/conf/indexing_core_specific/10/mergePolicy.xml diff --git a/solr8.11.2_files/conf/indexing_core_specific/11/mergePolicy.xml b/solr8.11.2_cloud/conf/indexing_core_specific/11/mergePolicy.xml similarity index 100% rename from solr8.11.2_files/conf/indexing_core_specific/11/mergePolicy.xml rename to solr8.11.2_cloud/conf/indexing_core_specific/11/mergePolicy.xml diff --git a/solr8.11.2_files/conf/indexing_core_specific/12/mergePolicy.xml b/solr8.11.2_cloud/conf/indexing_core_specific/12/mergePolicy.xml similarity index 100% rename from solr8.11.2_files/conf/indexing_core_specific/12/mergePolicy.xml rename to solr8.11.2_cloud/conf/indexing_core_specific/12/mergePolicy.xml diff --git a/solr8.11.2_files/conf/indexing_core_specific/2/mergePolicy.xml b/solr8.11.2_cloud/conf/indexing_core_specific/2/mergePolicy.xml similarity index 100% rename from solr8.11.2_files/conf/indexing_core_specific/2/mergePolicy.xml rename to solr8.11.2_cloud/conf/indexing_core_specific/2/mergePolicy.xml diff --git a/solr8.11.2_files/conf/indexing_core_specific/3/mergePolicy.xml b/solr8.11.2_cloud/conf/indexing_core_specific/3/mergePolicy.xml similarity index 100% rename from solr8.11.2_files/conf/indexing_core_specific/3/mergePolicy.xml rename to solr8.11.2_cloud/conf/indexing_core_specific/3/mergePolicy.xml diff --git a/solr8.11.2_files/conf/indexing_core_specific/4/mergePolicy.xml b/solr8.11.2_cloud/conf/indexing_core_specific/4/mergePolicy.xml similarity index 100% rename from solr8.11.2_files/conf/indexing_core_specific/4/mergePolicy.xml rename to solr8.11.2_cloud/conf/indexing_core_specific/4/mergePolicy.xml diff --git a/solr8.11.2_files/conf/indexing_core_specific/5/mergePolicy.xml b/solr8.11.2_cloud/conf/indexing_core_specific/5/mergePolicy.xml similarity index 100% rename from solr8.11.2_files/conf/indexing_core_specific/5/mergePolicy.xml rename to solr8.11.2_cloud/conf/indexing_core_specific/5/mergePolicy.xml diff --git a/solr8.11.2_files/conf/indexing_core_specific/6/mergePolicy.xml b/solr8.11.2_cloud/conf/indexing_core_specific/6/mergePolicy.xml similarity index 100% rename from solr8.11.2_files/conf/indexing_core_specific/6/mergePolicy.xml rename to solr8.11.2_cloud/conf/indexing_core_specific/6/mergePolicy.xml diff --git a/solr8.11.2_files/conf/indexing_core_specific/7/mergePolicy.xml b/solr8.11.2_cloud/conf/indexing_core_specific/7/mergePolicy.xml similarity index 100% rename from solr8.11.2_files/conf/indexing_core_specific/7/mergePolicy.xml rename to solr8.11.2_cloud/conf/indexing_core_specific/7/mergePolicy.xml diff --git a/solr8.11.2_files/conf/indexing_core_specific/8/mergePolicy.xml b/solr8.11.2_cloud/conf/indexing_core_specific/8/mergePolicy.xml similarity index 100% rename from solr8.11.2_files/conf/indexing_core_specific/8/mergePolicy.xml rename to solr8.11.2_cloud/conf/indexing_core_specific/8/mergePolicy.xml diff --git a/solr8.11.2_files/conf/indexing_core_specific/9/mergePolicy.xml b/solr8.11.2_cloud/conf/indexing_core_specific/9/mergePolicy.xml similarity index 100% rename from solr8.11.2_files/conf/indexing_core_specific/9/mergePolicy.xml rename to solr8.11.2_cloud/conf/indexing_core_specific/9/mergePolicy.xml diff --git a/solr8.11.2_files/conf/old/1000common_deprecated_2020Feb.txt b/solr8.11.2_cloud/conf/old/1000common_deprecated_2020Feb.txt similarity index 100% rename from solr8.11.2_files/conf/old/1000common_deprecated_2020Feb.txt rename to solr8.11.2_cloud/conf/old/1000common_deprecated_2020Feb.txt diff --git a/solr8.11.2_files/conf/schema.xml b/solr8.11.2_cloud/conf/schema.xml similarity index 100% rename from solr8.11.2_files/conf/schema.xml rename to solr8.11.2_cloud/conf/schema.xml diff --git a/solr8.11.2_files/conf/serve/mergePolicy.xml b/solr8.11.2_cloud/conf/serve/mergePolicy.xml similarity index 100% rename from solr8.11.2_files/conf/serve/mergePolicy.xml rename to solr8.11.2_cloud/conf/serve/mergePolicy.xml diff --git a/solr8.11.2_files/conf/solrconfig.xml b/solr8.11.2_cloud/conf/solrconfig.xml similarity index 100% rename from solr8.11.2_files/conf/solrconfig.xml rename to solr8.11.2_cloud/conf/solrconfig.xml diff --git a/solr8.11.2_files/conf/tfidf/similarity.xml b/solr8.11.2_cloud/conf/tfidf/similarity.xml similarity index 100% rename from solr8.11.2_files/conf/tfidf/similarity.xml rename to solr8.11.2_cloud/conf/tfidf/similarity.xml diff --git a/solr8.11.2_files/solrCloud_external_zooKeeper/init_files/solr_init.sh b/solr8.11.2_cloud/init_files/solr_init.sh similarity index 100% rename from solr8.11.2_files/solrCloud_external_zooKeeper/init_files/solr_init.sh rename to solr8.11.2_cloud/init_files/solr_init.sh diff --git a/solr8.11.2_files/lib/HTPostingsFormat.jar b/solr8.11.2_cloud/lib/HTPostingsFormat.jar similarity index 100% rename from solr8.11.2_files/lib/HTPostingsFormat.jar rename to solr8.11.2_cloud/lib/HTPostingsFormat.jar diff --git a/solr8.11.2_files/lib/icu4j-62.1.jar b/solr8.11.2_cloud/lib/icu4j-62.1.jar similarity index 100% rename from solr8.11.2_files/lib/icu4j-62.1.jar rename to solr8.11.2_cloud/lib/icu4j-62.1.jar diff --git a/solr8.11.2_files/lib/lucene-analyzers-icu-8.2.0.jar b/solr8.11.2_cloud/lib/lucene-analyzers-icu-8.2.0.jar similarity index 100% rename from solr8.11.2_files/lib/lucene-analyzers-icu-8.2.0.jar rename to solr8.11.2_cloud/lib/lucene-analyzers-icu-8.2.0.jar diff --git a/solr8.11.2_files/lib/lucene-umich-solr-filters-3.0-solr-8.8.2.jar b/solr8.11.2_cloud/lib/lucene-umich-solr-filters-3.0-solr-8.8.2.jar similarity index 100% rename from solr8.11.2_files/lib/lucene-umich-solr-filters-3.0-solr-8.8.2.jar rename to solr8.11.2_cloud/lib/lucene-umich-solr-filters-3.0-solr-8.8.2.jar diff --git a/solr8.11.2_files/security.json b/solr8.11.2_cloud/security.json similarity index 100% rename from solr8.11.2_files/security.json rename to solr8.11.2_cloud/security.json diff --git a/solr8.11.2_files/solrCloud_embedded_zooKeeper/collection_manager.sh b/solr8.11.2_files/solrCloud_embedded_zooKeeper/collection_manager.sh deleted file mode 100755 index e75eefc..0000000 --- a/solr8.11.2_files/solrCloud_embedded_zooKeeper/collection_manager.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash - -host="http://127.0.0.1:8983" - -echo "Indexing documents" -#curl "$host/solr/core-x/update?commit=true" --data-binary @solr_json_documents/core-data.json -H 'Content-type:application/json' - -for file in var/solr/data/data_sample/*.xml - do - echo "Indexing $file 🌞!!!" - curl "http://$SOLR_HOST/solr/core-x/update?commit=true" -H "Content-Type: text/xml" --data-binary @$file - done - diff --git a/solr8.11.2_files/solrCloud_embedded_zooKeeper/init_files/solr_init.sh b/solr8.11.2_files/solrCloud_embedded_zooKeeper/init_files/solr_init.sh deleted file mode 100644 index 9671e2f..0000000 --- a/solr8.11.2_files/solrCloud_embedded_zooKeeper/init_files/solr_init.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash - -echo "Starting up Solr and Zookeeper 🌞 🐘🦓🦒!!!" - -# an embedded ZooKeeper instance is started on the port 9983. It stars in Solr port+1000 -solr start -c -h 127.0.0.1 -p 8983 -m 2g - -echo "🐘🦓🦒 Checking Zookeeper on $ZK_HOST" -/opt/docker-solr/scripts/wait-for-zookeeper.sh - -# enables solr to start with basic auth turned on -solr zk cp /opt/solr/security.json zk:security.json - -echo "🌞 Checking Solr" -/opt/docker-solr/scripts/wait-for-solr.sh - -# runs docker entry-point.sh and whatever is in command -exec /opt/docker-solr/scripts/docker-entrypoint.sh "$@" - -# echo "🌞 Creating collection" -# uploads the configuration in the core-x directory and creates a collection -# solr create_collection -c core-x -d core-x/ -shards 3 -replicationFactor 3 - -# solr stop -p 8983 - -# cat /var/solr/logs/solr.log \ No newline at end of file diff --git a/solr8.11.2_files/solrCloud_external_zooKeeper/collection_manager.sh b/solr8.11.2_files/solrCloud_external_zooKeeper/collection_manager.sh deleted file mode 100755 index a80ecfd..0000000 --- a/solr8.11.2_files/solrCloud_external_zooKeeper/collection_manager.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash - -SOLR_HOST=$1 - -echo "Creating collection" -curl -u admin:"$SOLR_PASSWORD" "$SOLR_HOST/solr/admin/collections?action=CREATE&name=core-x&instanceDir=/var/solr/data/core-x&numShards=1&collection.configName=core-x&dataDir=/var/solr/data/core-x" - diff --git a/solr8.11.2_files/solr_standalone_mode/core.properties b/solr8.11.2_files/solr_standalone_mode/core.properties deleted file mode 100644 index b18def0..0000000 --- a/solr8.11.2_files/solr_standalone_mode/core.properties +++ /dev/null @@ -1,5 +0,0 @@ -#Written by CorePropertiesLocator -#Tue Jun 28 15:15:14 UTC 2022 -dataDir=data -name=core-x -config=solrconfig.xml \ No newline at end of file diff --git a/solr_manager/README.md b/solr_manager/README.md index 8f82d38..5fede3a 100644 --- a/solr_manager/README.md +++ b/solr_manager/README.md @@ -77,7 +77,7 @@ The project is divided into the following phases: ``` docker-compose up -d solr_manager ``` -4. [Recommendation] Run the script `solr_manager/init_solr_manager.sh` to start the application and set up the environment variables. +4. [Recommendation] Run the script `init_solr_manager.sh` to start the application and set up the environment variables. ``` ./init_solr_manager.sh ``` @@ -128,7 +128,6 @@ solr_manager/ ├── Dockerfile ├── README.md ├── docker-compose.yml -├── init_solr_manager.sh ├── pyproject.toml ├── poetry.lock ├── .env @@ -138,6 +137,7 @@ solr_manager/ │ ├── solr_collection_manager_test.py └── solr_files/ └── test_configset.zip +init_solr_manager.sh ``` As this project requires a Solr server to run, the `docker-compose.yml` file starts a 3 Solr containers @@ -175,11 +175,11 @@ The application provides the following functionalities: ### Create a Collection ``` -docker exec -it solr_manager python solr_collection_manager.py --solr_url --action create_collection --name +docker exec -it solr_manager python solr_collection_manager.py --solr_url --action create_collection --name --replication_factor ``` Example: -```docker exec -it solr_manager python solr_collection_manager.py --solr_url http://solr1:8983 --action create_collection --name new_collection``` +```docker exec -it solr_manager python solr_collection_manager.py --solr_url http://solr1:8983 --action create_collection --name new_collection --replication_factor 1``` The script that creates a collection also accepts the following optional arguments: - `--num_shards`: Number of shards (default: 1) @@ -187,11 +187,12 @@ The script that creates a collection also accepts the following optional argumen - `--configset_name`: Configset name (default: _default) - `--max_shards_per_node`: Maximum number of shards per node (default: 1) -In the command below you will see how to use the different parameters to create collections with different configurations: +In the command below, +you will see how to use the different parameters to create collections with different configurations: ``` docker exec -it solr_manager python solr_collection_manager.py --solr_url http://solr1:8983 --action create_collection \ ---name new_collection --num_shards 2 --maxShardsPerNode 3 --replication_factor 2 --configset_name conf +--name new_collection --num_shards 1 --max_shards_per_node 1 --replication_factor 1 --configset_name conf ``` The collection with the name `new_collection` will be created using the already created configset `conf` and the @@ -212,7 +213,7 @@ The script will return an error message: * if the Solr server is not running Example: -```docker exec -it solr_manager python solr_collection_manager.py --solr_url http://solr1:8983 --action delete_collection --name new_coll``` +```docker exec -it solr_manager python solr_collection_manager.py --solr_url http://solr1:8983 --action delete_collection --name new_collection``` ### List Collections ``` @@ -228,7 +229,7 @@ docker exec -it solr_manager python solr_collection_manager.py --solr_url https://solr.apache.org/guide/solr/latest/deployment-guide/docker-faq.html - networks: - - solr - depends_on: - zoo1: - condition: service_healthy - zoo2: - condition: service_healthy - zoo3: - condition: service_healthy - healthcheck: - test: [ "CMD", "/usr/bin/curl", "-s", "-f", "http://solr1:8983/solr/#/admin/ping" ] - interval: 30s - timeout: 10s - retries: 5 - command: solr-foreground -c # Solr command to start the container to make sure the security.json is created - solr2: - image: ghcr.io/hathitrust/full-text-search-external_zoo:shards-8.11 - #build: - # context: .. - # dockerfile: solr8.11.2_files/Dockerfile - # target: external_zookeeper - container_name: solr2 - ports: - - "8982:8983" - environment: - - ZK_HOST=zoo1:2181,zoo2:2181,zoo3:2181 - networks: - - solr - depends_on: - zoo1: - condition: service_healthy - zoo2: - condition: service_healthy - zoo3: - condition: service_healthy - healthcheck: - test: [ "CMD", "/usr/bin/curl", "-s", "-f", "http://solr2:8983/solr/#/admin/ping" ] - interval: 30s - timeout: 10s - retries: 5 - command: solr-foreground -c - solr3: - image: ghcr.io/hathitrust/full-text-search-external_zoo:shards-8.11 - #build: - # context: .. - # dockerfile: solr8.11.2_files/Dockerfile - # target: external_zookeeper - container_name: solr3 - ports: - - "8983:8983" - environment: - - ZK_HOST=zoo1:2181,zoo2:2181,zoo3:2181 - networks: - - solr - depends_on: - zoo1: - condition: service_healthy - zoo2: - condition: service_healthy - zoo3: - condition: service_healthy - healthcheck: - test: [ "CMD", "/usr/bin/curl", "-s", "-f", "http://solr3:8983/solr/#/admin/ping" ] - interval: 30s - timeout: 10s - retries: 5 - command: solr-foreground -c - zoo1: - image: zookeeper:3.8.0 - container_name: zoo1 - restart: always - hostname: zoo1 - ports: - - 2181:2181 - - 7001:7000 - environment: - ZOO_MY_ID: 1 - ZOO_SERVERS: server.1=zoo1:2888:3888;2181 server.2=zoo2:2888:3888;2181 server.3=zoo3:2888:3888;2181 - ZOO_4LW_COMMANDS_WHITELIST: mntr, conf, ruok - ZOO_CFG_EXTRA: "metricsProvider.className=org.apache.zookeeper.metrics.prometheus.PrometheusMetricsProvider metricsProvider.httpPort=7000 metricsProvider.exportJvmInfo=true" - networks: - - solr - volumes: - - zoo1_data:/data - healthcheck: - test: [ "CMD", "echo", "ruok", "|", "nc", "localhost", "2181", "|", "grep", "imok" ] - interval: 30s - timeout: 10s - retries: 5 - zoo2: - image: zookeeper:3.8.0 - container_name: zoo2 - restart: always - hostname: zoo2 - ports: - - 2182:2181 - - 7002:7000 - environment: - ZOO_MY_ID: 2 - ZOO_SERVERS: server.1=zoo1:2888:3888;2181 server.2=zoo2:2888:3888;2181 server.3=zoo3:2888:3888;2181 - ZOO_4LW_COMMANDS_WHITELIST: mntr, conf, ruok - ZOO_CFG_EXTRA: "metricsProvider.className=org.apache.zookeeper.metrics.prometheus.PrometheusMetricsProvider metricsProvider.httpPort=7000 metricsProvider.exportJvmInfo=true" - networks: - - solr - volumes: - - zoo2_data:/data - healthcheck: - test: [ "CMD", "echo", "ruok", "|", "nc", "localhost", "2181", "|", "grep", "imok" ] - interval: 30s - timeout: 10s - retries: 5 - zoo3: - image: zookeeper:3.8.0 - container_name: zoo3 - restart: always - hostname: zoo3 - ports: - - 2183:2181 - - 7003:7000 - environment: - ZOO_MY_ID: 3 - ZOO_SERVERS: server.1=zoo1:2888:3888;2181 server.2=zoo2:2888:3888;2181 server.3=zoo3:2888:3888;2181 - ZOO_4LW_COMMANDS_WHITELIST: mntr, conf, ruok - ZOO_CFG_EXTRA: "metricsProvider.className=org.apache.zookeeper.metrics.prometheus.PrometheusMetricsProvider metricsProvider.httpPort=7000 metricsProvider.exportJvmInfo=true" - networks: - - solr - volumes: - - zoo3_data:/data - healthcheck: - test: [ "CMD", "echo", "ruok", "|", "nc", "localhost", "2181", "|", "grep", "imok" ] - interval: 30s - timeout: 10s - retries: 5 -networks: - solr: -volumes: - zoo1_data: null - zoo2_data: null - zoo3_data: null \ No newline at end of file diff --git a/solr_manager/init_solr_manager.sh b/solr_manager/init_solr_manager.sh deleted file mode 100755 index e16583e..0000000 --- a/solr_manager/init_solr_manager.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/usr/bin/env bash - -if [ -f ".env" ]; then - echo "🌎 .env exists. Leaving alone" -else - echo "🌎 .env does not exist. Copying .env-example to .env" - cp env.example .env - - YOUR_UID=`id -u` - YOUR_GID=`id -g` - echo "🙂 Setting your UID ($YOUR_UID) and GID ($YOUR_UID) in .env" - docker run --rm -v ./.env:/.env alpine echo "$(sed s/YOUR_UID/$YOUR_UID/ .env)" > .env - docker run --rm -v ./.env:/.env alpine echo "$(sed s/YOUR_GID/$YOUR_GID/ .env)" > .env -fi - -echo "🚢 Run containers" -docker-compose -f docker-compose.yml up \ No newline at end of file diff --git a/solr_manager/solr_collection_manager.py b/solr_manager/solr_collection_manager.py index 99ab5e5..6deab7b 100644 --- a/solr_manager/solr_collection_manager.py +++ b/solr_manager/solr_collection_manager.py @@ -178,7 +178,7 @@ def main(): parser.add_argument('--name', type=str, help='Name of the collection', required=False) parser.add_argument('--num_shards', type=int, help='Number of shards', required=False, default=1) parser.add_argument('--replication_factor', type=int, help='Replication factor', required=False, - default=3) + default=1) # Defining maxShardsPerNode sets a limit on the number of replicas the CREATE action will spread to each node. parser.add_argument('--max_shards_per_node', type=int, help='Number of shards', required=False, default=1) diff --git a/solr_manager/solr_collection_manager_test.py b/solr_manager/solr_collection_manager_test.py index 8d0dab7..dc533d4 100644 --- a/solr_manager/solr_collection_manager_test.py +++ b/solr_manager/solr_collection_manager_test.py @@ -1,9 +1,11 @@ +from os.path import dirname from unittest.mock import Mock from unittest.mock import patch import unittest import pytest import os +import sys import requests @@ -87,7 +89,7 @@ def test_upload_configset(mock_put, solr_manager): } configset_name = "test_configset" mock_put.return_value.json.return_value = mock_response - path_config_set = f"{os.getcwd()}/solr_files/conf.zip" + path_config_set = f"{os.getcwd()}/tests/conf.zip" response = solr_manager.upload_configset(configset_name, path_config_set) assert response == mock_response diff --git a/solr_manager/solr_files/conf.zip b/solr_manager/tests/conf.zip similarity index 99% rename from solr_manager/solr_files/conf.zip rename to solr_manager/tests/conf.zip index bf705595b844b203e49f00758a86052157ebc99d..624c6b226e8f3465fecafd967402a1d69b0f5c2c 100644 GIT binary patch delta 27 gcmZ4WoN?82#tqMvnOQQVHosE-?*?YP$=b&O0IZb@#Q*>R delta 27 gcmZ4WoN?82#tqMvnPXZQH@{N;?*?YP$=b&O0JWqHdjJ3c