Uncategorized

Solr Search Engine for Desktop

                     It takes good amount of time to find a file in Windows machines. Here is simple system where we can index all the desktop files in solr search engine, a famous open source search engine and can be searched in seconds.

Solr is an open source search engine from apache solr https://lucene.apache.org/solr/.

Step 1: Download the solr and run it in cloud mode. Here is the guide to get start on solr. https://lucene.apache.org/solr/guide/7_7/solr-tutorial.html#solr-tutorial

once you download and extract the solr into a file, just start the solr with following command

>./bin/solr start -c -p 8983 -s ../example/cloud/node1/solr

This will start the first node.  This may take approximately 30 sec to start it. And it will also start zookeeper ensemble to manage the nodes. Once first node starts, here is the command to start second node.

>./bin/solr start -c -p 7574 -s ../example/cloud/node2/solr -z localhost:9983

this will start the solr engine in two node cluster and can be accessed at http://localhost:8983/solr

Step 2: Create a collection for indexing files

>solr create -c latest -s 2 -rf 2

Step 3: Write a program to extract and index the files names in solr search Engines.

package com.kvn.web.solrClients;

import java.io.File;
import java.io.IOException;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import java.util.function.Predicate;
import java.util.stream.Collectors;

import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpSolrClient;

public class ListFilesOfType {

	static String baseFoldersToIndex[] = { "C:/docs", "C:/Java Majestic", "C:/JavaMajestic",
			"C:/Users/prabhukvn/Documents", "c:/Users/prabhukvn/Downloads", "C:/POCS", "C:/softwares" };

	Predicate<File> isFile = (f1) -> f1.isFile();
	String baseUrl = "http://localhost:8983/solr/latest";
	HttpSolrClient client = null;

	static List<String> fileTypes = new ArrayList<>();
	static List<String> filePaths = new ArrayList<>();

	static {
		fileTypes.add(".pdf");
		fileTypes.add(".doc");
		fileTypes.add(".docx");

	}
	static {
		filePaths.addAll(Arrays.asList(baseFoldersToIndex));

	}

	public static void main(String[] args) {
		ListFilesOfType obj = new ListFilesOfType();

		long startTime = System.currentTimeMillis();
		filePaths.stream().map(path -> new File(path)).forEach(obj::processFile);
		System.out.println("Total Time:"+(System.currentTimeMillis()-startTime));

	}

	public void processFile(File file) {
		//System.out.println("----" + file.getAbsolutePath() + "--------");
		File listOfFiles[] = file.listFiles();

		if (listOfFiles != null) {
			List<FileDoc> fileDocs = Arrays.stream(listOfFiles).parallel().filter(isFile.and(this::filterFiles))
					.map(this::createFileDoc).collect(Collectors.toList());
			if (fileDocs != null && fileDocs.size() > 0) {
				this.sendToSolar(fileDocs);
			}
			Arrays.stream(listOfFiles).filter(f2 -> f2.isDirectory()).forEach(this::processFile);
		}	

	}

	private void sendToSolar(List<FileDoc> fileDocs) {
		try {
			// System.out.println("--------------------------------------");
			fileDocs.parallelStream().forEach(f3 -> System.out.println(f3.getName()));
			HttpSolrClient solrClient = this.connect();
			solrClient.addBeans(fileDocs);
			solrClient.commit();
		} catch (SolrServerException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}

	}

	FileDoc createFileDoc(File file) {
		FileDoc fileDoc = new FileDoc(file.getName(), file.getAbsolutePath(), new Timestamp(file.lastModified()));
		return fileDoc;
	}

	public boolean filterFiles(File f) {
		
		Optional<String> f4 = fileTypes.parallelStream().filter(fileType -> f.getName().endsWith(fileType)).findFirst();
		return f4.isPresent();
	}

	/**
	 * Connect to solr using solr client.
	 * 
	 * @return
	 */
	private HttpSolrClient connect() {
		if (client == null) {
			client = new HttpSolrClient.Builder(baseUrl).withConnectionTimeout(10000).withSocketTimeout(60000).build();
		}
		return client;

	}

}


And Maven Dependency
		<dependency>
			<groupId>org.apache.solr</groupId>
			<artifactId>solr-solrj</artifactId>
			<version>7.7.0</version>
		</dependency>

And use http://localhost:8983/solr/#/pdf-files/query to search the data.

Or http://localhost:8983/solr to open the dashboard

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Google photo

You are commenting using your Google account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s