You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

663 lines
29 KiB

<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<!-- Begin Jekyll SEO tag v2.5.0 -->
<title>How to build a Profile Search Index | Blockstack</title>
<meta name="generator" content="Jekyll v3.8.3" />
<meta property="og:title" content="How to build a Profile Search Index" />
<meta name="author" content="Blockstack" />
<meta property="og:locale" content="en_US" />
<meta name="description" content="How to build a Profile Search Index" />
<meta property="og:description" content="How to build a Profile Search Index" />
<link rel="canonical" href="https://zbabystack.netlify.com/core/naming/search.html" />
<meta property="og:url" content="https://zbabystack.netlify.com/core/naming/search.html" />
<meta property="og:site_name" content="Blockstack" />
<meta property="og:type" content="article" />
<meta property="article:published_time" content="2018-09-10T15:46:41-07:00" />
<script type="application/ld+json">
{"description":"How to build a Profile Search Index","author":{"@type":"Person","name":"Blockstack"},"@type":"BlogPosting","url":"https://zbabystack.netlify.com/core/naming/search.html","headline":"How to build a Profile Search Index","dateModified":"2018-09-10T15:46:41-07:00","datePublished":"2018-09-10T15:46:41-07:00","mainEntityOfPage":{"@type":"WebPage","@id":"https://zbabystack.netlify.com/core/naming/search.html"},"@context":"http://schema.org"}</script>
<!-- End Jekyll SEO tag -->
<!-- <meta property="og:image" content="https://zbabystack.netlify.com/assets/posts/logo.png"/> -->
<meta property="og:image" content="/assets/posts/logo.png"/>
<link rel="stylesheet" href="/assets/css/main.css">
<link rel="shortcut icon" type="image/png" href="/assets/img/touch-icon.png" >
<link rel="alternate" type="application/rss+xml" title="Blockstack" href="/feed.xml">
<script src="/assets/js/main.js"></script>
</head>
<body>
<header class="uk-background-secondary">
<div data-uk-sticky="sel-target: .uk-navbar-container; cls-active: uk-navbar-sticky" class="uk-sticky uk-sticky-fixed" style="position: fixed; top: 0px; width: 1904px;">
<nav class="uk-navbar-container">
<div class="uk-container">
<div data-uk-navbar>
<div class="uk-navbar-left">
<!-- <a class="uk-navbar-item uk-logo" href="/"><img src="https://zbabystack.netlify.com/assets/posts/logo.png" alt="Docs"></a> -->
<a class="uk-navbar-item uk-logo" href="/"><img src="/assets/posts/logo.png" alt="Docs"></a>
</div>
<div class="uk-navbar-right">
<ul class="uk-navbar-nav uk-visible@m">
<li><a href="https://blockstack.org" target="_blank" >Blockstack.org</a></li>
<li><a href="https://forum.blockstack.org/" target="_blank" >Forums</a></li>
<li><a href="https://github.com/blockstack" target="_blank" >GitHub</a></li>
</ul>
<div>
<a class="uk-navbar-toggle" uk-search-icon href="#"></a>
<div class="uk-drop uk-background-default uk-border-rounded" uk-drop="mode: click; pos: left-center; offset: 0">
<form class="uk-search uk-search-navbar uk-width-1-1" onsubmit="return false;">
<input id="searchBox" class="uk-search-input" type="search" placeholder="Search..." autofocus>
</form>
<ul id="searchBox-results" class="uk-position-absolute uk-width-1-1 uk-list"></ul>
</div>
</div>
<a class="uk-navbar-toggle uk-hidden@m" href="#offcanvas" data-uk-navbar-toggle-icon data-uk-toggle></a>
</div>
</div>
</div>
</nav>
</div>
</header>
<div class="uk-section">
<div class="uk-container">
<div class="uk-grid-large" data-uk-grid>
<div class="sidebar-fixed-width uk-visible@m">
<div class="sidebar-docs uk-position-fixed">
<h5>Overview</h5>
<ul class="uk-nav uk-nav-default doc-nav">
<!-- -->
<li class=""><a href="/core/naming/introduction.html">Blockstack Naming Service (BNS)</a></li>
<!-- -->
<li class=""><a href="/core/naming/architecture.html">Understand the Architecture</a></li>
<!-- -->
<li class=""><a href="/core/naming/namespaces.html">Understand Namespaces</a></li>
<!-- -->
<li class=""><a href="/core/naming/comparison.html">Naming system feature comparison</a></li>
</ul>
<h5>Tutorials & Cookbooks</h5>
<ul class="uk-nav uk-nav-default doc-nav">
<!-- -->
<li class=""><a href="/core/naming/tutorial_creation.html">Create and Launch a Namespace</a></li>
<!-- -->
<li class=""><a href="/core/naming/tutorial_subdomains.html">Subdomain Design and Implementation</a></li>
<!-- -->
<li class="uk-active"><a href="/core/naming/search.html">How to build a Profile Search Index</a></li>
<!-- -->
<li class=""><a href="/core/naming/openbazaar.html">How to link your OpenBazaar GUID to your Blockstack ID</a></li>
</ul>
<h5>How to use BNS</h5>
<ul class="uk-nav uk-nav-default doc-nav">
<!-- -->
<li class=""><a href="/core/naming/pickname.html">Choose a name</a></li>
<!-- -->
<li class=""><a href="/core/naming/creationhowto.html">Creating a Namespace</a></li>
<!-- -->
<li class=""><a href="/core/naming/resolving.html">Resolve a name</a></li>
<!-- -->
<li class=""><a href="/core/naming/register.html">Register a name</a></li>
<!-- -->
<li class=""><a href="/core/naming/manage.html">Manage BNS Names</a></li>
<!-- -->
<li class=""><a href="/core/naming/subdomains.html">BNS Subdomains</a></li>
</ul>
<h5>Forks and Dids</h5>
<ul class="uk-nav uk-nav-default doc-nav">
<!-- -->
<li class=""><a href="/core/naming/forks.html">BNS Forks</a></li>
<!-- -->
<li class=""><a href="/core/naming/did.html">Decentralized Identifiers (DIDs)</a></li>
</ul>
<h5>Atlas</h5>
<ul class="uk-nav uk-nav-default doc-nav">
<!-- -->
<li class=""><a href="/core/atlas/overview.html">Overview of the Atlas network</a></li>
<!-- -->
<li class=""><a href="/core/atlas/howitworks.html">How Atlas Works</a></li>
<!-- -->
<li class=""><a href="/core/atlas/howtouse.html">How to Use the Atlas Network</a></li>
</ul>
<h5>Reference</h5>
<ul class="uk-nav uk-nav-default doc-nav">
<!-- -->
<li class=""><a href="/common/javascript_ref.html">Blockstack Javascript Reference</a></li>
<!-- -->
<li class=""><a href="/common/core_ref.html">Blockstack CORE API</a></li>
<!-- -->
<li class=""><a href="/core/faq_technical.html">Blockstack Technical FAQ</a></li>
</ul>
</div>
</div>
<div class="uk-width-1-1 uk-width-expand@m">
<article class="uk-article">
<h1 class="uk-article-title">How to build a Profile Search Index</h1>
<div class="uk-article-meta uk-margin-top uk-margin-medium-bottom">
<!-- <img class="avatar avatar-small" alt="Blockstack" width="32" height="32" data-proofer-ignore="true" src="https://avatars2.githubusercontent.com/Blockstack?v=3&s=32" srcset="https://avatars2.githubusercontent.com/Blockstack?v=3&s=32 1x, https://avatars2.githubusercontent.com/Blockstack?v=3&s=64 2x, https://avatars2.githubusercontent.com/Blockstack?v=3&s=96 3x, https://avatars2.githubusercontent.com/Blockstack?v=3&s=128 4x" /> -->
<!-- Written by <span itemprop="author" itemscope itemtype="http://schema.org/Person"><span itemprop="name">Blockstack</span></span><br> -->
<time datetime="2018-09-10T15:46:41-07:00" itemprop="datePublished">
<a "target="_blank" href="https://github.com/blockstack/blockstack-core/blob/master/docs/search.md" class="btn btn-default githubEditButton" role="button">
<span data-uk-icon="icon: pencil; ratio: 1.2"></span> Edit this page on Github</a>
<span style="font-family:Wingdings">&#119;</span> Sep 10, 2018
</time>
</div>
<div class="article-content">
<p>The search subsystem for Blockstack Core creates an index for data associated
with registered names in namespaces and makes that data searchable.</p>
<p>The search subsystem is currently meant to index the .id namespace but can
be easily expanded to include other namespaces.</p>
<p>Currently there are two types of indexes to handle search queries:</p>
<ul>
<li>Substring search on usernames, full names, twitter_handle (powered by MongoDB)</li>
<li>Raw Lucene index which handles searching extended data e.g., bio.</li>
</ul>
<p>Search will currently return upto a max of 20 results (can be less depending on the query)
with data that follows structure of <a href="https://github.com/blockstack/blockstack">blockstack IDs</a>:</p>
<p>In early 2017, the search subsystem was ported over to the new API system, where support for search is provided by the endpoint:</p>
<div class="highlighter-rouge"><pre class="highlight"><code>http://localhost:5000/search?query=&lt;SEARCH_PATTERN&gt;
</code></pre>
</div>
<p>This document describes how to setup the search subsystem to respond at that endpoint.</p>
<h1 id="installation">Installation</h1>
<ul>
<li><strong>Step 1:</strong> First, make sure you have <a href="http://docs.python-guide.org/en/latest/dev/virtualenvs/">virtualenv installed</a>.
Then, setup the API and search subsystem:
<div class="highlighter-rouge"><pre class="highlight"><code>$ sudo apt-get install -y mongodb memcached python-dev libmemcached-dev zlib1g-dev nginx
$ sudo pip install uwsgi
$ git clone https://github.com/blockstack/blockstack-core.git --branch api
$ cd blockstack-core/
$ sudo pip install .
$ sudo pip install -r api/requirements.txt
$ sudo mkdir /var/blockstack-search &amp;&amp; sudo chown $USER:$USER /var/blockstack-search
</code></pre>
</div>
</li>
<li>
<p><strong>Step 2:</strong> Make sure you have Blockstack Core running locally (see <a href="https://github.com/blockstack/blockstack-core/blob/master/README.md#quick-start">instructions</a>). We highly
recommend using a local node because the search subsystem issues thousands of calls to
Blockstack Core for re-indexing and remote nodes can slow down performance.</p>
</li>
<li><strong>Step 3:</strong> Fetch the data for the .id namespace and respective profiles. Note, you may want to redirect stderr to a file, as there is a lot of debug output.</li>
</ul>
<div class="highlighter-rouge"><pre class="highlight"><code>$ cd api/
$ python -m search.fetch_data --fetch_namespace
$ python -m search.fetch_data --fetch_profiles
</code></pre>
</div>
<ul>
<li><strong>Step 4:</strong> Create the search index:</li>
</ul>
<div class="highlighter-rouge"><pre class="highlight"><code>python -m search.basic_index --refresh
</code></pre>
</div>
<ul>
<li><strong>Step 5:</strong> Enable search API endpoint:</li>
</ul>
<div class="highlighter-rouge"><pre class="highlight"><code>$ sed -i 's/SEARCH_API_ENDPOINT_ENABLED \= False/SEARCH_API_ENDPOINT_ENABLED \= True/' config.py
</code></pre>
</div>
<h1 id="usage">Usage</h1>
<p>You can quickly test the search index from the command line:</p>
<div class="highlighter-rouge"><pre class="highlight"><code>python -m search.substring_search --search_name "Fred Wil"
python -m search.substring_search --search_twitter fredwil
</code></pre>
</div>
<p>You can also use the search API end-point:</p>
<blockquote>
<p>curl -G {machine_ip}:port/search/name -d “query=muneeb”</p>
</blockquote>
<p>Sample Response:</p>
<div class="highlighter-rouge"><pre class="highlight"><code><span class="p">{</span><span class="w">
</span><span class="nt">"people"</span><span class="p">:</span><span class="w"> </span><span class="p">[</span><span class="w">
</span><span class="p">{</span><span class="w">
</span><span class="nt">"profile"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span><span class="w">
</span><span class="nt">"website"</span><span class="p">:</span><span class="w"> </span><span class="p">[</span><span class="w">
</span><span class="p">{</span><span class="w">
</span><span class="nt">"url"</span><span class="p">:</span><span class="w"> </span><span class="s2">"http://muneebali.com"</span><span class="p">,</span><span class="w">
</span><span class="nt">"@type"</span><span class="p">:</span><span class="w"> </span><span class="s2">"WebSite"</span><span class="w">
</span><span class="p">}</span><span class="w">
</span><span class="p">],</span><span class="w">
</span><span class="nt">"name"</span><span class="p">:</span><span class="w"> </span><span class="s2">"Muneeb Ali"</span><span class="p">,</span><span class="w">
</span><span class="nt">"address"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span><span class="w">
</span><span class="nt">"addressLocality"</span><span class="p">:</span><span class="w"> </span><span class="s2">"New York, NY"</span><span class="p">,</span><span class="w">
</span><span class="nt">"@type"</span><span class="p">:</span><span class="w"> </span><span class="s2">"PostalAddress"</span><span class="w">
</span><span class="p">},</span><span class="w">
</span><span class="nt">"image"</span><span class="p">:</span><span class="w"> </span><span class="p">[</span><span class="w">
</span><span class="p">{</span><span class="w">
</span><span class="nt">"contentUrl"</span><span class="p">:</span><span class="w"> </span><span class="s2">"https://s3.amazonaws.com/dx3/muneeb"</span><span class="p">,</span><span class="w">
</span><span class="nt">"@type"</span><span class="p">:</span><span class="w"> </span><span class="s2">"ImageObject"</span><span class="p">,</span><span class="w">
</span><span class="nt">"name"</span><span class="p">:</span><span class="w"> </span><span class="s2">"cover"</span><span class="w">
</span><span class="p">},</span><span class="w">
</span><span class="p">{</span><span class="w">
</span><span class="nt">"contentUrl"</span><span class="p">:</span><span class="w"> </span><span class="s2">"https://s3.amazonaws.com/kd4/muneeb"</span><span class="p">,</span><span class="w">
</span><span class="nt">"@type"</span><span class="p">:</span><span class="w"> </span><span class="s2">"ImageObject"</span><span class="p">,</span><span class="w">
</span><span class="nt">"name"</span><span class="p">:</span><span class="w"> </span><span class="s2">"avatar"</span><span class="w">
</span><span class="p">}</span><span class="w">
</span><span class="p">],</span><span class="w">
</span><span class="nt">"@type"</span><span class="p">:</span><span class="w"> </span><span class="s2">"Person"</span><span class="p">,</span><span class="w">
</span><span class="nt">"description"</span><span class="p">:</span><span class="w"> </span><span class="s2">"Co-founder of Blockstack. Interested in distributed systems and blockchains. Previously, PhD at Princeton."</span><span class="w">
</span><span class="p">},</span><span class="w">
</span><span class="nt">"username"</span><span class="p">:</span><span class="w"> </span><span class="s2">"muneeb"</span><span class="w">
</span><span class="p">},</span><span class="w">
</span><span class="p">{</span><span class="w">
</span><span class="nt">"profile"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span><span class="w">
</span><span class="nt">"message"</span><span class="p">:</span><span class="w"> </span><span class="s2">"This blockchain ID is reserved for Muneeb Ali. If this is you, please email support@onename.com to claim it for free."</span><span class="p">,</span><span class="w">
</span><span class="nt">"status"</span><span class="p">:</span><span class="w"> </span><span class="s2">"reserved"</span><span class="w">
</span><span class="p">},</span><span class="w">
</span><span class="nt">"username"</span><span class="p">:</span><span class="w"> </span><span class="s2">"muneebali"</span><span class="w">
</span><span class="p">},</span><span class="w">
</span><span class="p">{</span><span class="w">
</span><span class="nt">"profile"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span><span class="w">
</span><span class="nt">"cover"</span><span class="p">:</span><span class="w"> </span><span class="p">{</span><span class="w">
</span><span class="nt">"url"</span><span class="p">:</span><span class="w"> </span><span class="s2">"https://s3.amazonaws.com/97p/HHE.jpg"</span><span class="w">
</span><span class="p">},</span><span class="w">
</span><span class="nt">"v"</span><span class="p">:</span><span class="w"> </span><span class="s2">"0.2"</span><span class="w">
</span><span class="p">},</span><span class="w">
</span><span class="nt">"username"</span><span class="p">:</span><span class="w"> </span><span class="s2">"muneebali1"</span><span class="w">
</span><span class="p">}</span><span class="w">
</span><span class="p">]</span><span class="w">
</span><span class="p">}</span><span class="w">
</span></code></pre>
</div>
<h2 id="enabling-elastic-search">Enabling Elastic Search</h2>
<h3 id="requirements">Requirements:</h3>
<div class="highlighter-rouge"><pre class="highlight"><code>sudo apt-get install mongodb
sudo apt-get install memcached libmemcached-dev
sudo apt-get install python2.7-dev
pip install -r requirements.txt
</code></pre>
</div>
<h3 id="elastic-search">Elastic Search</h3>
<p>Elastic Search library is not in github and resides at unix/lib/elastic</p>
<p>the current version we’re using is <em>0.90.2</em>. Download from:</p>
<blockquote>
<p>wget https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-0.90.2.zip</p>
</blockquote>
<p>before installing pylimbmc make sure <a href="memcached.md">memcached</a> is installed.</p>
<p>Ensure that mongodb and elastic search are running
starting elastic search:</p>
<div class="highlighter-rouge"><pre class="highlight"><code>$elasticsearch (on mac)
bin/elasticsearch -d (on linux)
</code></pre>
</div>
<p>To test if elastic search is running:</p>
<blockquote>
<p>curl -X GET http://localhost:9200/</p>
</blockquote>
<p>returns:</p>
<div class="highlighter-rouge"><pre class="highlight"><code><span class="p">{</span><span class="w">
</span><span class="nt">"ok"</span><span class="w"> </span><span class="p">:</span><span class="w"> </span><span class="kc">true</span><span class="p">,</span><span class="w">
</span><span class="nt">"status"</span><span class="w"> </span><span class="p">:</span><span class="w"> </span><span class="mi">200</span><span class="p">,</span><span class="w">
</span><span class="nt">"name"</span><span class="w"> </span><span class="p">:</span><span class="w"> </span><span class="s2">"Angler"</span><span class="p">,</span><span class="w">
</span><span class="nt">"version"</span><span class="w"> </span><span class="p">:</span><span class="w"> </span><span class="p">{</span><span class="w">
</span><span class="nt">"number"</span><span class="w"> </span><span class="p">:</span><span class="w"> </span><span class="s2">"0.90.2"</span><span class="p">,</span><span class="w">
</span><span class="nt">"snapshot_build"</span><span class="w"> </span><span class="p">:</span><span class="w"> </span><span class="kc">false</span><span class="p">,</span><span class="w">
</span><span class="nt">"lucene_version"</span><span class="w"> </span><span class="p">:</span><span class="w"> </span><span class="s2">"4.3.1"</span><span class="w">
</span><span class="p">},</span><span class="w">
</span></code></pre>
</div>
<p>Create Index:</p>
<blockquote>
<p>python create_search_index.py –create_index</p>
</blockquote>
<div class="share uk-text-center">
<a href="https://twitter.com/intent/tweet?text=How to build a Profile Search Index&url=https://zbabystack.netlify.com/core/naming/search.html&via=&related=" rel="nofollow" target="_blank" title="Share on Twitter" onclick="window.open(this.href, 'twitter', 'width=550,height=235');return false;"><span data-uk-icon="icon: twitter; ratio: 1.2"></span></a>
<a class="uk-margin-small-left" href="https://www.facebook.com/sharer/sharer.php?u=https%3A%2F%2Fzbabystack.netlify.com%2Fcore%2Fnaming%2Fsearch.html" rel="nofollow" target="_blank" title="Share on Facebook" onclick="window.open(this.href, 'facebook-share','width=580,height=296');return false;"><span data-uk-icon="icon: facebook; ratio: 1.2"></span></a>
</div>
</div>
<hr class="uk-margin-medium">
<div class="uk-margin-large-top">
<h3>Related Articles</h3>
<ul class="uk-list">
</ul>
</div>
</article>
<script>
// Table of contents scroll to
UIkit.scroll('#markdown-toc a', {
duration: 400,
offset: 120
});
</script>
</div>
</div>
</div>
<div id="offcanvas" data-uk-offcanvas="flip: true; overlay: true">
<div class="uk-offcanvas-bar">
<button class="uk-offcanvas-close" type="button" data-uk-close></button>
<ul class="uk-nav uk-nav-default">
<!-- <li><a class="uk-logo uk-margin-small-bottom" href="/"><img src="https://zbabystack.netlify.com/assets/posts/logo.png" alt="Docs"></a></li> -->
<li><a class="uk-logo uk-margin-small-bottom" href="/"><img src="/assets/posts/logo.png" alt="Docs"></a></li>
<li><a href="https://blockstack.org" target="_blank" >Blockstack.org</a></li>
<li><a href="https://forum.blockstack.org/" target="_blank" >Forums</a></li>
<li><a href="https://github.com/blockstack" target="_blank" >GitHub</a></li>
</ul>
<div class="uk-margin-small-top uk-text-center uk-text-muted uk-link-muted">
<div data-uk-grid class="uk-child-width-auto uk-grid-small uk-flex-center uk-grid">
<div class="uk-first-column">
<a href="https://twitter.com/" data-uk-icon="icon: twitter" class="uk-icon-link uk-icon" target="_blank"></a>
</div>
<div>
<a href="https://www.facebook.com/" data-uk-icon="icon: facebook" class="uk-icon-link uk-icon" target="_blank"></a>
</div>
<div>
<a href="https://www.instagram.com/" data-uk-icon="icon: instagram" class="uk-icon-link uk-icon" target="_blank"></a>
</div>
<div>
<a href="https://vimeo.com/" data-uk-icon="icon: vimeo" class="uk-icon-link uk-icon" target="_blank"></a>
</div>
</div>
</div>
</div>
</div>
<footer class="uk-section uk-text-center uk-text-muted uk-link-muted">
<div class="uk-container uk-container-small">
<!-- <div>
<ul class="uk-subnav uk-flex-center">
<li><a href="https://blockstack.org" target="_blank" >Blockstack.org</a></li>
<li><a href="https://forum.blockstack.org/" target="_blank" >Forums</a></li>
<li><a href="https://github.com/blockstack" target="_blank" >GitHub</a></li>
</ul>
</div>
<div class="uk-margin-medium">
<div data-uk-grid class="uk-child-width-auto uk-grid-small uk-flex-center uk-grid">
<div class="uk-first-column">
<a href="https://twitter.com/" data-uk-icon="icon: twitter" class="uk-icon-link uk-icon" target="_blank"></a>
</div>
<div>
<a href="https://www.facebook.com/" data-uk-icon="icon: facebook" class="uk-icon-link uk-icon" target="_blank"></a>
</div>
<div>
<a href="https://www.instagram.com/" data-uk-icon="icon: instagram" class="uk-icon-link uk-icon" target="_blank"></a>
</div>
<div>
<a href="https://vimeo.com/" data-uk-icon="icon: vimeo" class="uk-icon-link uk-icon" target="_blank"></a>
</div>
</div>
</div> -->
<div class="uk-margin-medium uk-text-small copyright">&copy; 2018 Blockstack</div>
</div>
</footer>
<script type="text/javascript">
/* Create a configuration object */
var ss360Config = {
/* Your site id */
siteId: 'blockstack',
/* A CSS selector that points to your search box */
searchBox: {selector: '#searchBox'}
};
</script>
<script src="https://cdn.sitesearch360.com/sitesearch360-v11.min.js" async></script>
</body>
</html>