<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
	xmlns:content="http://purl.org/rss/1.0/modules/content/"
	xmlns:wfw="http://wellformedweb.org/CommentAPI/"
	xmlns:dc="http://purl.org/dc/elements/1.1/"
	xmlns:atom="http://www.w3.org/2005/Atom"
	xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
	xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
	xmlns:georss="http://www.georss.org/georss" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:media="http://search.yahoo.com/mrss/"
	>

<channel>
	<title>GigaOM &#187; Mapr</title>
	<atom:link href="http://gigaom.com/tag/mapr/feed/" rel="self" type="application/rss+xml" />
	<link>http://gigaom.com</link>
	<description></description>
	<lastBuildDate>Thu, 23 May 2013 06:02:56 +0000</lastBuildDate>
	<language>en</language>
	<sy:updatePeriod>hourly</sy:updatePeriod>
	<sy:updateFrequency>1</sy:updateFrequency>
	<generator>http://wordpress.com/</generator>
<cloud domain='gigaom.com' port='80' path='/?rsscloud=notify' registerProcedure='' protocol='http-post' />
<image>
		<url>http://0.gravatar.com/blavatar/0db8f6557d022075dbbf010c54d46d93?s=96&#038;d=http%3A%2F%2Fs2.wp.com%2Fi%2Fbuttonw-com.png</url>
		<title>GigaOM &#187; Mapr</title>
		<link>http://gigaom.com</link>
	</image>
	<atom:link rel="search" type="application/opensearchdescription+xml" href="http://gigaom.com/osd.xml" title="GigaOM" />
	<atom:link rel='hub' href='http://gigaom.com/?pushpress=hub'/>
		<item>
		<title>How data warehousing is now a cost-effective solution for businesses</title>
		<link>http://pro.gigaom.com/report/the-new-economics-of-enterprise-data-warehousing/</link>
		<comments>http://pro.gigaom.com/report/the-new-economics-of-enterprise-data-warehousing/#comments</comments>
		<pubDate>Mon, 13 May 2013 06:55:34 +0000</pubDate>
		<dc:creator>nraden</dc:creator>
				<category><![CDATA[Uncategorized]]></category>
		<category><![CDATA[ADAPA]]></category>
		<category><![CDATA[analytics]]></category>
		<category><![CDATA[apache-hadoop]]></category>
		<category><![CDATA[big data]]></category>
		<category><![CDATA[Business Intelligence]]></category>
		<category><![CDATA[clickstream analytics]]></category>
		<category><![CDATA[Cloud Computing]]></category>
		<category><![CDATA[cloud-infrastructure]]></category>
		<category><![CDATA[columnar databases]]></category>
		<category><![CDATA[data]]></category>
		<category><![CDATA[data analysis]]></category>
		<category><![CDATA[data management]]></category>
		<category><![CDATA[data storage]]></category>
		<category><![CDATA[data warehouse]]></category>
		<category><![CDATA[data warehousing]]></category>
		<category><![CDATA[data-analytics]]></category>
		<category><![CDATA[database]]></category>
		<category><![CDATA[database technology]]></category>
		<category><![CDATA[Database theory]]></category>
		<category><![CDATA[distributed processing]]></category>
		<category><![CDATA[EMC]]></category>
		<category><![CDATA[etl]]></category>
		<category><![CDATA[extraction transform load systems]]></category>
		<category><![CDATA[Ferrari]]></category>
		<category><![CDATA[file systems]]></category>
		<category><![CDATA[Greenplum]]></category>
		<category><![CDATA[Hadoop]]></category>
		<category><![CDATA[HDFS]]></category>
		<category><![CDATA[Hewlett-Packard]]></category>
		<category><![CDATA[high-speed technologies]]></category>
		<category><![CDATA[HP]]></category>
		<category><![CDATA[IBM]]></category>
		<category><![CDATA[information technology]]></category>
		<category><![CDATA[integrated circuit]]></category>
		<category><![CDATA[Linux]]></category>
		<category><![CDATA[Mapr]]></category>
		<category><![CDATA[mobile devices]]></category>
		<category><![CDATA[Moore's Law]]></category>
		<category><![CDATA[Netezza]]></category>
		<category><![CDATA[Open Source Software]]></category>
		<category><![CDATA[parallel processing]]></category>
		<category><![CDATA[relational-databases]]></category>
		<category><![CDATA[scalability]]></category>
		<category><![CDATA[social networks]]></category>
		<category><![CDATA[storage devices]]></category>
		<category><![CDATA[storage virtualization technologies]]></category>
		<category><![CDATA[System administration]]></category>
		<category><![CDATA[tco]]></category>
		<category><![CDATA[total-cost-of-ownership]]></category>
		<category><![CDATA[Transaction processing]]></category>
		<category><![CDATA[Truviso]]></category>
		<category><![CDATA[Vertica]]></category>
		<category><![CDATA[Virtualization technology]]></category>

		<guid isPermaLink="false">http://pro.gigaom.com/?post_type=go-report&#038;p=175747/</guid>
		<description><![CDATA[Data-warehouse providers are quickly adding Hadoop distributions, or even their own versions of Hadoop, into their architecture, adding further cost advantages to collections of extremely large data sets. Finding the talent to manage this newly converged environment will not be easy, but it presents tremendous opportunity for companies willing to take some risk.<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=gigaom.com&#038;blog=14960843&#038;post=648494&#038;subd=gigaom2&#038;ref=&#038;feed=1" width="1" height="1" />]]></description>
				<content:encoded><![CDATA[<p>The new economics of data warehousing provide attractive alternatives in both costs and benefits. While big data gets most of the attention, evolved data warehousing will play an important role for the foreseeable future. In order to be relevant, data-warehouse design and operation need to be simplified, taking advantage of greatly improved hardware, software, and methods.</p>
<br />  <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=gigaom.com&#038;blog=14960843&#038;post=648494&#038;subd=gigaom2&#038;ref=&#038;feed=1" width="1" height="1" /><p><a href="http://pubads.g.doubleclick.net/gampad/jump?iu=/1008864/GigaOM_RSS_300x250&#038;sz=300x250&#038;c=625208"><img src="http://pubads.g.doubleclick.net/gampad/ad?iu=/1008864/GigaOM_RSS_300x250&#038;sz=300x250&#038;c=625208" /></a></p><p><strong>Related research and analysis from GigaOM Pro:</strong><br />Subscriber content. <a href="http://pro.gigaom.com/?utm_medium=editorial&utm_campaign=auto3&utm_term=648494+the-new-economics-of-enterprise-data-warehousing&utm_content=nraden">Sign up for a free trial</a>.</p><ul><li><a href="http://pro.gigaom.com/2012/04/infrastructure-q1-cloud-and-big-data-woo-the-enterprise/?utm_medium=editorial&utm_campaign=auto3&utm_term=648494+the-new-economics-of-enterprise-data-warehousing&utm_content=nraden">Infrastructure Q1: Cloud and big data woo enterprises</a></li><li><a href="http://pro.gigaom.com/2012/07/cloud-and-data-second-quarter-2012-analysis-and-outlook-2/?utm_medium=editorial&utm_campaign=auto3&utm_term=648494+the-new-economics-of-enterprise-data-warehousing&utm_content=nraden">Takeaways from the second quarter in cloud and data</a></li><li><a href="http://pro.gigaom.com/2011/04/infrastructure-q1-iaas-comes-down-to-earth-big-data-takes-flight/?utm_medium=editorial&utm_campaign=auto3&utm_term=648494+the-new-economics-of-enterprise-data-warehousing&utm_content=nraden">Infrastructure Q1: IaaS Comes Down to Earth; Big Data Takes Flight</a></li></ul>]]></content:encoded>
			<wfw:commentRss>http://pro.gigaom.com/report/the-new-economics-of-enterprise-data-warehousing/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:thumbnail url="https://gigaom-pro-files.s3.amazonaws.com/files/2012/08/datacenter1.jpg?w=150" />
		<media:content url="https://gigaom-pro-files.s3.amazonaws.com/files/2012/08/datacenter1.jpg?w=150" medium="image">
			<media:title type="html">datacenter1</media:title>
		</media:content>

		<media:content url="http://0.gravatar.com/avatar/fdbbd80432b14e9d84aa12c6fc0cce24?s=96&#38;d=retro&#38;r=PG" medium="image">
			<media:title type="html">nraden</media:title>
		</media:content>
	</item>
		<item>
		<title>MapR releases M7, its commercial HBase distro</title>
		<link>http://gigaom.com/2013/05/01/mapr-releases-m7-its-commercial-hbase-distro/</link>
		<comments>http://gigaom.com/2013/05/01/mapr-releases-m7-its-commercial-hbase-distro/#comments</comments>
		<pubDate>Wed, 01 May 2013 23:21:07 +0000</pubDate>
		<dc:creator>Derrick Harris</dc:creator>
				<category><![CDATA[big data]]></category>
		<category><![CDATA[Databases]]></category>
		<category><![CDATA[Hadoop]]></category>
		<category><![CDATA[Hbase]]></category>
		<category><![CDATA[Mapr]]></category>
		<category><![CDATA[NoSQL]]></category>
		<category><![CDATA[open source]]></category>

		<guid isPermaLink="false">http://gigaom.com/?p=641425</guid>
		<description><![CDATA[MapR on Wednesday released its commercial version of HBase called M7, the first such product on the market, that the company claims is bigger, faster and better than the open source version.<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=gigaom.com&#038;blog=14960843&#038;post=641425&#038;subd=gigaom2&#038;ref=&#038;feed=1" width="1" height="1" />]]></description>
				<content:encoded><![CDATA[<p>MapR didn&#8217;t miss the memo about the key to success in the Hadoop space being the creation of a data platform that can do many things. And on Wednesday, the company released its take on HBase, <a href="http://www.mapr.com/products/mapr-editions/m7-edition">called M7.</a></p>
<p>Last week, I <a href="http://gigaom.com/2013/04/22/how-hbase-converted-myspaces-mysql-champion-and-is-driving-hadoop-mainstream/">explained how HBase is fast becoming the star of the Hadoop ecosystem</a> because it allows users to build more real-time, almost transactional applications on top of Hadoop. True to its form with its other products, MapR has taken HBase even further with M7 by promising greater availability (99.999 percent), instant recovery, faster operations and the ability to handle 1 trillion tables in a single cluster. In open source versions of HBase, MapR VP of Marketing Jack Norris told me, the accepted table limit per cluster is several hundred.</p>
<p><a href="http://gigaom2.files.wordpress.com/2013/05/m7.jpg"><img  alt="m7" src="http://gigaom2.files.wordpress.com/2013/05/m7.jpg?w=300&#038;h=265" width="300" height="265" class="alignright size-medium wp-image-641471" /></a>Additionally, M7 shares a single data layer with the Hadoop file system, meaning less performance overhead and, presumably, easier management.</p>
<p>As we&#8217;re seeing with other Hadoop vendors, including Cloudera (which <a href="http://gigaom.com/2013/04/30/with-impala-now-ga-clouderas-ceo-sizes-up-the-sql-on-hadoop-market/">released its Impala SQL query engine on Tuesday</a>), the Hadoop market is fast becoming one where each vendor is trying to set itself apart from the rest by building the best platform with the broadest set of capabilities. In furtherance of that mission, MapR also announced on Wednesday full-text search on its Hadoop distribution thanks to a partnership with Lucene specialist LucidWorks. It already has its own Hadoop distribution complete with proprietary code to bolster the file system and speed up MapReduce, as well as an <a href="http://gigaom.com/2012/08/17/for-fast-interactive-hadoop-queries-drill-may-be-the-answer/">open source SQL-on-Hadoop project called Drill</a> in the works.</p>
<p>MapR employees are probably sleeping a lot easier these days as a result of this platform push. Others in the Hadoop market used to talk about the fear of fragmentation and then point at MapR as the example of a company helping foment that outcome with its proprietary software. Now, however, even if everyone else is building open source products, they&#8217;re all still backing their own and largely dismissing the others.</p>
<p>I suspect the result is feature lock-in even there&#8217;s no technological lock-in, kind of <a href="http://gigaom.com/2011/03/16/how-amazon-is-following-apples-lead-to-rule-cloud-computing/">like using Amazon Web Services for cloud computing</a> and then hoping to replicate its various servies elsewhere. It might be easy enough to move your data, but impossible or very difficult to replicate those additional capabilities elsewhere. If MapR can build a better version of HBase and companies are willing to pay for it, then so be it.</p>
<br />  <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=gigaom.com&#038;blog=14960843&#038;post=641425&#038;subd=gigaom2&#038;ref=&#038;feed=1" width="1" height="1" /><p><a href="http://pubads.g.doubleclick.net/gampad/jump?iu=/1008864/GigaOM_RSS_300x250&#038;sz=300x250&#038;c=534988"><img src="http://pubads.g.doubleclick.net/gampad/ad?iu=/1008864/GigaOM_RSS_300x250&#038;sz=300x250&#038;c=534988" /></a></p><p><strong>Related research and analysis from GigaOM Pro:</strong><br />Subscriber content. <a href="http://pro.gigaom.com/?utm_source=data&utm_medium=editorial&utm_campaign=auto3&utm_term=641425+mapr-releases-m7-its-commercial-hbase-distro&utm_content=dharrisstructure">Sign up for a free trial</a>.</p><ul><li><a href="http://pro.gigaom.com/2012/04/infrastructure-q1-cloud-and-big-data-woo-the-enterprise/?utm_source=data&utm_medium=editorial&utm_campaign=auto3&utm_term=641425+mapr-releases-m7-its-commercial-hbase-distro&utm_content=dharrisstructure">Infrastructure Q1: Cloud and big data woo enterprises</a></li><li><a href="http://pro.gigaom.com/2011/03/defining-hadoop-the-players-technologies-and-challenges-of-2011/?utm_source=data&utm_medium=editorial&utm_campaign=auto3&utm_term=641425+mapr-releases-m7-its-commercial-hbase-distro&utm_content=dharrisstructure">Defining Hadoop: the Players, Technologies and Challenges of 2011</a></li><li><a href="http://pro.gigaom.com/2012/12/big-data-2013-key-trends-and-companies-to-watch/?utm_source=data&utm_medium=editorial&utm_campaign=auto3&utm_term=641425+mapr-releases-m7-its-commercial-hbase-distro&utm_content=dharrisstructure">Big data 2013: key trends and companies to watch</a></li></ul>]]></content:encoded>
			<wfw:commentRss>http://gigaom.com/2013/05/01/mapr-releases-m7-its-commercial-hbase-distro/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:thumbnail url="http://gigaom2.files.wordpress.com/2012/09/shutterstock_110961494.jpg?w=150" />
		<media:content url="http://gigaom2.files.wordpress.com/2012/09/shutterstock_110961494.jpg?w=150" medium="image">
			<media:title type="html">Database rows</media:title>
		</media:content>

		<media:content url="http://0.gravatar.com/avatar/9e48ffa0913f65c577727457dd63023f?s=96&#38;d=retro&#38;r=PG" medium="image">
			<media:title type="html">dharrisstructure</media:title>
		</media:content>

		<media:content url="http://gigaom2.files.wordpress.com/2013/05/m7.jpg?w=300" medium="image">
			<media:title type="html">m7</media:title>
		</media:content>
	</item>
		<item>
		<title>With Impala now GA, Cloudera&#8217;s CEO sizes up the SQL-on-Hadoop market</title>
		<link>http://gigaom.com/2013/04/30/with-impala-now-ga-clouderas-ceo-sizes-up-the-sql-on-hadoop-market/</link>
		<comments>http://gigaom.com/2013/04/30/with-impala-now-ga-clouderas-ceo-sizes-up-the-sql-on-hadoop-market/#comments</comments>
		<pubDate>Tue, 30 Apr 2013 13:00:40 +0000</pubDate>
		<dc:creator>Derrick Harris</dc:creator>
				<category><![CDATA[big data]]></category>
		<category><![CDATA[Cloudera]]></category>
		<category><![CDATA[data warehouse]]></category>
		<category><![CDATA[Greenplum]]></category>
		<category><![CDATA[Hadoop]]></category>
		<category><![CDATA[hive]]></category>
		<category><![CDATA[Hortonworks]]></category>
		<category><![CDATA[Impala]]></category>
		<category><![CDATA[Mapr]]></category>
		<category><![CDATA[open source]]></category>
		<category><![CDATA[SQL]]></category>
		<category><![CDATA[SQL on Hadoop]]></category>

		<guid isPermaLink="false">http://gigaom.com/?p=640777</guid>
		<description><![CDATA[Cloudera's Impala engine for interactive SQL queries on Hadoop data is now generally available, and CEO Mike Olson gives his lay of the competitive landscape.<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=gigaom.com&#038;blog=14960843&#038;post=640777&#038;subd=gigaom2&#038;ref=&#038;feed=1" width="1" height="1" />]]></description>
				<content:encoded><![CDATA[<p>There is no shortage of confidence in the Hadoop space, and market leader Cloudera bolstered its own on Tuesday with the general availability of its Impala SQL query engine for Hadoop. And if CEO Mike Olson&#8217;s comments are any indication, we&#8217;re in for a long ride of competitive jockeying and oneupmanship as Cloudera and its peers go all Microsoft or Google and create myriad new data-processing engines to turn their Hadoop distributions into bona fide platforms.</p>
<p>Launched as a private beta in May 2012 and <a href="http://gigaom.com/2012/10/24/cloudera-makes-sql-a-first-class-citizen-in-hadoop/">made public in October</a>, Impala is Cloudera&#8217;s attempt to address the growing demand for interactive SQL analytics on Hadoop data. It&#8217;s essentially a massively parallel database designed to share the same storage platform and metadata as Hadoop MapReduce, only it is its own separate processing engine.</p>
<div id="attachment_640848" class="wp-caption alignleft" style="width: 310px"><a href="http://gigaom2.files.wordpress.com/2013/04/impala-arch-new.jpg"><img  alt="How Impala fits in" src="http://gigaom2.files.wordpress.com/2013/04/impala-arch-new.jpg?w=300&#038;h=257" width="300" height="257" class="size-medium wp-image-640848" /></a><p class="wp-caption-text">How Impala fits in</p></div>
<p>Impala actually uses the same &#8220;nearly ANSI&#8221; version of SQL as does current standard bearer Hive, but that technology (created by Facebook in 2009 as a data warehouse layer for Hadoop) doesn&#8217;t run nearly fast enough to sate many users&#8217; desire for interactive analytics. This is because Hive transforms SQL queries into MapReduce jobs, meaning every one is processed against the entire corpus of data in the Hadoop Distributed File System.</p>
<h2 id="sizing-up-the-competition">Sizing up the competition</h2>
<p>Only Cloudera isn&#8217;t the first to have the idea, <a href="http://gigaom.com/2013/02/21/sql-is-whats-next-for-hadoop-heres-whos-doing-it/">nor is it alone in trying to sell interactive SQL on Hadoop</a>. The idea was <a href="http://gigaom.com/2011/10/21/hadapt-raises-9-5m-for-hadoop-data-warehouse/">first commercialized by Boston-based startup Hadapt</a> in 2011, and is now being pushed by numerous startups and larger Hadoop players. Among them: Pivotal (formerly EMC) Greenplum, MapR (with <a href="http://gigaom.com/2012/08/17/for-fast-interactive-hadoop-queries-drill-may-be-the-answer/">Drill</a>), Hortonworks (with <a href="http://hortonworks.com/blog/100x-faster-hive/">Stinger</a>), Drawn to Scale, Splice Machine, Jethro Data and Citus Data.</p>
<div id="attachment_640858" class="wp-caption aligncenter" style="width: 600px"><a href="http://gigaom2.files.wordpress.com/2013/04/had_graphic2-scaled.jpg"><img  alt="Hadapt's architecture" src="http://gigaom2.files.wordpress.com/2013/04/had_graphic2-scaled.jpg?w=708"   class="size-full wp-image-640858" /></a><p class="wp-caption-text">Hadapt&#8217;s architecture</p></div>
<p>But Cloudera is arguably the biggest name pushing SQL on Hadoop, and CEO Mike Olson thinks Impala stands out for several reasons &#8212; not the least of which is that it exists as a product. &#8220;Nobody else is shipping production-grade SQL query support on Hadoop,&#8221; he told me during a recent call. &#8220;At least not in open source.&#8221; He seems content to let the startups do their things, instead focusing his attention on Cloudera&#8217;s big three Hadoop-distribution competitors in Pivotal, MapR and Hortonworks. Greenplum and Pivotal SVP Scott Yara <a href="http://gigaom.com/2013/02/25/emc-to-hadoop-competition-see-ya-wouldnt-wanna-be-ya/">was full of confidence &#8212; and R&amp;D budget</a>&#8211; when the company announced the Pivotal HD distribution and HAWQ technology in February, but Olson claims the approach requires a siloed DBMS within HDFS and is a &#8220;rearguard defensive strategy&#8221; to protect the company&#8217;s sunk costs in its database technology.</p>
<div id="attachment_615210" class="wp-caption aligncenter" style="width: 718px"><a href="http://gigaom2.files.wordpress.com/2013/02/hawq1.jpg"><img  alt="The Pivotal HD and Hawq architecture" src="http://gigaom2.files.wordpress.com/2013/02/hawq1.jpg?w=708&#038;h=387" width="708" height="387" class="size-large wp-image-615210" /></a><p class="wp-caption-text">The Pivotal HD and Hawq architecture</p></div>
<p>As for Hortonworks, Olson questions the wisdom of its Stinger initiative to boost Hive&#8217;s speed, noting that &#8220;Hive never got good while it was running standalone on MapReduce.&#8221; Hortonworks also <a href="http://gigaom.com/2013/04/15/teradata-to-connect-hadoop-and-data-warehouses-roll-out-new-appliance/">partners with vendors such as Teradata</a> to let their platforms access Hadoop data in its native format, but those approaches still require sending data over the network. &#8220;It&#8217;s not the way you would build it if you woke up in the 2000s and were building this anew,&#8221; Olson said.</p>
<div id="attachment_640854" class="wp-caption aligncenter" style="width: 718px"><a href="http://gigaom2.files.wordpress.com/2013/04/stingerroad.png"><img  alt="The Stinger roadmap" src="http://gigaom2.files.wordpress.com/2013/04/stingerroad.png?w=708&#038;h=558" width="708" height="558" class="size-large wp-image-640854" /></a><p class="wp-caption-text">The Stinger roadmap</p></div>
<p>Olson acknowledged that the MapR-led Apache Drill project is cut from the same cloth as Impala (that is, being a Google Dremel clone designed specifically for Hadoop), but &#8220;the difference is we&#8217;re shipping code.&#8221; Being generally available and ready for production workloads means Cloudera can lock down users and market share before many even have a chance to experiment with Drill. He all but dismissed questions over the readiness of Impala, spurred by rumblings in the Hadoop space that Cloudera rushed it into public beta in order to get on the scoreboard against more fully baked offerings.</p>
<p>&#8220;I don&#8217;t feel we&#8217;re under the gun competitively to pull it out of beta because no one else has product in the market,&#8221; Olson said. &#8220;I have no problems &#8230; calling this GA quality.&#8221; He did, however, acknowledge that Impala is shipping with a &#8220;minium viable feature set&#8221; that the company has plans to build on in the near future. Impala Senior Product Manager Justin Erickson noted a few issues of concern, including around the number of concurrent users Impala can support, but said they have been addressed during the beta period.</p>
<h2 id="one-piece-of-a-larger-platform">One piece of a larger platform</h2>
<p>Really, though, the whole point of Impala and its competitors is to turn Hadoop from a tool for batch analytics and mass storage <a href="http://gigaom.com/2013/03/07/5-reasons-why-the-future-of-hadoop-is-real-time-relatively-speaking/">into a platform that can handle nearly all of companies&#8217; data-processing needs</a>. In that regard, it appears we&#8217;re just getting started. Cloudera, MapR, Pivotal Greenplum and Hortonworks are already pushing their own products and projects, and Olson said &#8220;it&#8217;s absolutely our intent&#8221; to enhance Cloudera&#8217;s platform with even more open-source products &#8212; perhaps even more database technologies <a href="http://gigaom.com/2013/04/22/how-hbase-converted-myspaces-mysql-champion-and-is-driving-hadoop-mainstream/">a la HBase</a> &#8212; that will let users do more stuff with more types of data. Over time, this strategy could result in Hadoop displacing the current breed of databases and data warehouses and becoming the single data store atop of which users run whatever applications they so desire. For now, though, especially when it comes to Impala and the data warehouse incumbents, Olson is taking a measured approach. &#8220;The likelihood that we&#8217;re going to knock them off in the near term,&#8221; he said, &#8220;&#8230; it would be a tough fight to win.&#8221;</p>
<br />  <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=gigaom.com&#038;blog=14960843&#038;post=640777&#038;subd=gigaom2&#038;ref=&#038;feed=1" width="1" height="1" /><p><a href="http://pubads.g.doubleclick.net/gampad/jump?iu=/1008864/GigaOM_RSS_300x250&#038;sz=300x250&#038;c=755531"><img src="http://pubads.g.doubleclick.net/gampad/ad?iu=/1008864/GigaOM_RSS_300x250&#038;sz=300x250&#038;c=755531" /></a></p><p><strong>Related research and analysis from GigaOM Pro:</strong><br />Subscriber content. <a href="http://pro.gigaom.com/?utm_source=data&utm_medium=editorial&utm_campaign=auto3&utm_term=640777+with-impala-now-ga-clouderas-ceo-sizes-up-the-sql-on-hadoop-market&utm_content=dharrisstructure">Sign up for a free trial</a>.</p><ul><li><a href="http://pro.gigaom.com/2012/03/a-near-term-outlook-for-big-data/?utm_source=data&utm_medium=editorial&utm_campaign=auto3&utm_term=640777+with-impala-now-ga-clouderas-ceo-sizes-up-the-sql-on-hadoop-market&utm_content=dharrisstructure">A near-term outlook for big data</a></li><li><a href="http://pro.gigaom.com/2012/04/infrastructure-q1-cloud-and-big-data-woo-the-enterprise/?utm_source=data&utm_medium=editorial&utm_campaign=auto3&utm_term=640777+with-impala-now-ga-clouderas-ceo-sizes-up-the-sql-on-hadoop-market&utm_content=dharrisstructure">Infrastructure Q1: Cloud and big data woo enterprises</a></li><li><a href="http://pro.gigaom.com/2012/04/sector-roadmap-hadoop-platforms-2012/?utm_source=data&utm_medium=editorial&utm_campaign=auto3&utm_term=640777+with-impala-now-ga-clouderas-ceo-sizes-up-the-sql-on-hadoop-market&utm_content=dharrisstructure">2012: The Hadoop infrastructure market booms</a></li></ul>]]></content:encoded>
			<wfw:commentRss>http://gigaom.com/2013/04/30/with-impala-now-ga-clouderas-ceo-sizes-up-the-sql-on-hadoop-market/feed/</wfw:commentRss>
		<slash:comments>1</slash:comments>
	
		<media:thumbnail url="http://gigaom2.files.wordpress.com/2013/01/1z5o1503.jpg?w=150" />
		<media:content url="http://gigaom2.files.wordpress.com/2013/01/1z5o1503.jpg?w=150" medium="image">
			<media:title type="html">Structure Data 2012: Michael Olson – CEO, Cloudera</media:title>
		</media:content>

		<media:content url="http://0.gravatar.com/avatar/9e48ffa0913f65c577727457dd63023f?s=96&#38;d=retro&#38;r=PG" medium="image">
			<media:title type="html">dharrisstructure</media:title>
		</media:content>

		<media:content url="http://gigaom2.files.wordpress.com/2013/04/impala-arch-new.jpg?w=300" medium="image">
			<media:title type="html">How Impala fits in</media:title>
		</media:content>

		<media:content url="http://gigaom2.files.wordpress.com/2013/04/had_graphic2-scaled.jpg" medium="image">
			<media:title type="html">Hadapt&#039;s architecture</media:title>
		</media:content>

		<media:content url="http://gigaom2.files.wordpress.com/2013/02/hawq1.jpg?w=708" medium="image">
			<media:title type="html">The Pivotal HD and Hawq architecture</media:title>
		</media:content>

		<media:content url="http://gigaom2.files.wordpress.com/2013/04/stingerroad.png?w=708" medium="image">
			<media:title type="html">The Stinger roadmap</media:title>
		</media:content>
	</item>
		<item>
		<title>Cloud and data first-quarter 2013: analysis and outlook</title>
		<link>http://pro.gigaom.com/report/cloud-and-data-first-quarter-2013-analysis-and-outlook/</link>
		<comments>http://pro.gigaom.com/report/cloud-and-data-first-quarter-2013-analysis-and-outlook/#comments</comments>
		<pubDate>Tue, 09 Apr 2013 06:55:36 +0000</pubDate>
		<dc:creator><a href="http://pro.gigaom.com/members/davidlinthicum/" rel="author">David S. Linthicum</a></dc:creator>
				<category><![CDATA[Uncategorized]]></category>
		<category><![CDATA[Amazon]]></category>
		<category><![CDATA[Amazon cloud computing]]></category>
		<category><![CDATA[Amazon Web Services]]></category>
		<category><![CDATA[amazon-elastic-compute-cloud]]></category>
		<category><![CDATA[Amazon.com]]></category>
		<category><![CDATA[apache-hadoop]]></category>
		<category><![CDATA[apple inc.]]></category>
		<category><![CDATA[aws]]></category>
		<category><![CDATA[Azure Services Platform]]></category>
		<category><![CDATA[big data]]></category>
		<category><![CDATA[central-intelligence-agency]]></category>
		<category><![CDATA[Centralized computing]]></category>
		<category><![CDATA[CIA]]></category>
		<category><![CDATA[Cisco]]></category>
		<category><![CDATA[Cisco Systems]]></category>
		<category><![CDATA[Client/Server]]></category>
		<category><![CDATA[Cloud]]></category>
		<category><![CDATA[Cloud Computing]]></category>
		<category><![CDATA[cloud computing services]]></category>
		<category><![CDATA[Cloud computing taxes]]></category>
		<category><![CDATA[Cloud Storage]]></category>
		<category><![CDATA[cloud storage services]]></category>
		<category><![CDATA[cloud technology]]></category>
		<category><![CDATA[cloud-applications]]></category>
		<category><![CDATA[cloud-based storage services]]></category>
		<category><![CDATA[cloud-infrastructure]]></category>
		<category><![CDATA[Cloudera]]></category>
		<category><![CDATA[CloudMe]]></category>
		<category><![CDATA[computing]]></category>
		<category><![CDATA[consumer-oriented cloud storage services]]></category>
		<category><![CDATA[data management]]></category>
		<category><![CDATA[data processing store]]></category>
		<category><![CDATA[Data Synchronization]]></category>
		<category><![CDATA[database management systems]]></category>
		<category><![CDATA[database technology]]></category>
		<category><![CDATA[DataDirect Networks]]></category>
		<category><![CDATA[Datameer]]></category>
		<category><![CDATA[Dropbox]]></category>
		<category><![CDATA[EMC]]></category>
		<category><![CDATA[file hosting]]></category>
		<category><![CDATA[File system-sharing services]]></category>
		<category><![CDATA[firewall]]></category>
		<category><![CDATA[Google]]></category>
		<category><![CDATA[Greenplum]]></category>
		<category><![CDATA[Hadoop]]></category>
		<category><![CDATA[Hewlett-Packard]]></category>
		<category><![CDATA[HP]]></category>
		<category><![CDATA[IBM]]></category>
		<category><![CDATA[icloud]]></category>
		<category><![CDATA[Idaho State Tax Commission]]></category>
		<category><![CDATA[Income taxes]]></category>
		<category><![CDATA[Intel]]></category>
		<category><![CDATA[iPad]]></category>
		<category><![CDATA[iPhone]]></category>
		<category><![CDATA[Joyent]]></category>
		<category><![CDATA[Linux]]></category>
		<category><![CDATA[Macquarie Capital]]></category>
		<category><![CDATA[Mapr]]></category>
		<category><![CDATA[massively parallel processing]]></category>
		<category><![CDATA[Microsoft]]></category>
		<category><![CDATA[microsoft-windows]]></category>
		<category><![CDATA[mobile device]]></category>
		<category><![CDATA[MongoDB]]></category>
		<category><![CDATA[Nimbula]]></category>
		<category><![CDATA[NoSQL]]></category>
		<category><![CDATA[ObjectRocket]]></category>
		<category><![CDATA[Online backup services]]></category>
		<category><![CDATA[open source]]></category>
		<category><![CDATA[OpenStack]]></category>
		<category><![CDATA[Oracle]]></category>
		<category><![CDATA[oracle-corporation]]></category>
		<category><![CDATA[oracle-database]]></category>
		<category><![CDATA[parallel processing]]></category>
		<category><![CDATA[private clouds]]></category>
		<category><![CDATA[Public Clouds]]></category>
		<category><![CDATA[Rackspace]]></category>
		<category><![CDATA[Relational database]]></category>
		<category><![CDATA[relational database management systems]]></category>
		<category><![CDATA[saleseforce-com]]></category>
		<category><![CDATA[Salesforce.com]]></category>
		<category><![CDATA[SAN]]></category>
		<category><![CDATA[smartphone]]></category>
		<category><![CDATA[smartphones]]></category>
		<category><![CDATA[software]]></category>
		<category><![CDATA[software delivery]]></category>
		<category><![CDATA[SQL]]></category>
		<category><![CDATA[Star Analytics]]></category>
		<category><![CDATA[storage-area-network]]></category>
		<category><![CDATA[Tablet computer]]></category>
		<category><![CDATA[Teradata]]></category>
		<category><![CDATA[U.S. government]]></category>

		<guid isPermaLink="false">http://pro.gigaom.com/?post_type=go-report&#038;p=173124/</guid>
		<description><![CDATA[Cloud computing is finally starting to add value to business, as those in charge of cloud within enterprises are moving from talking to doing. That much was very evident in the first quarter of 2013.<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=gigaom.com&#038;blog=14960843&#038;post=648537&#038;subd=gigaom2&#038;ref=&#038;feed=1" width="1" height="1" />]]></description>
				<content:encoded><![CDATA[<p>Cloud computing is finally starting to add value to business, as those in charge of cloud within enterprises are moving from talking to doing. That much was very evident in the first quarter of 2013.</p>
<br />  <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=gigaom.com&#038;blog=14960843&#038;post=648537&#038;subd=gigaom2&#038;ref=&#038;feed=1" width="1" height="1" /><p><a href="http://pubads.g.doubleclick.net/gampad/jump?iu=/1008864/GigaOM_RSS_300x250&#038;sz=300x250&#038;c=852004"><img src="http://pubads.g.doubleclick.net/gampad/ad?iu=/1008864/GigaOM_RSS_300x250&#038;sz=300x250&#038;c=852004" /></a></p><p><strong>Related research and analysis from GigaOM Pro:</strong><br />Subscriber content. <a href="http://pro.gigaom.com/?utm_medium=editorial&utm_campaign=auto3&utm_term=648537+cloud-and-data-first-quarter-2013-analysis-and-outlook&utm_content=gigaedit">Sign up for a free trial</a>.</p><ul><li><a href="http://pro.gigaom.com/2011/04/infrastructure-q1-iaas-comes-down-to-earth-big-data-takes-flight/?utm_medium=editorial&utm_campaign=auto3&utm_term=648537+cloud-and-data-first-quarter-2013-analysis-and-outlook&utm_content=gigaedit">Infrastructure Q1: IaaS Comes Down to Earth; Big Data Takes Flight</a></li><li><a href="http://pro.gigaom.com/2012/04/infrastructure-q1-cloud-and-big-data-woo-the-enterprise/?utm_medium=editorial&utm_campaign=auto3&utm_term=648537+cloud-and-data-first-quarter-2013-analysis-and-outlook&utm_content=gigaedit">Infrastructure Q1: Cloud and big data woo enterprises</a></li><li><a href="http://pro.gigaom.com/2011/07/infrastructure-q2-big-data-and-paas-gain-more-momentum/?utm_medium=editorial&utm_campaign=auto3&utm_term=648537+cloud-and-data-first-quarter-2013-analysis-and-outlook&utm_content=gigaedit">Infrastructure Q2: Big data and PaaS gain more momentum</a></li></ul>]]></content:encoded>
			<wfw:commentRss>http://pro.gigaom.com/report/cloud-and-data-first-quarter-2013-analysis-and-outlook/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:thumbnail url="http://pro.gigaom.com/files/2009/04/gigaompromasterimagecloud.jpg?w=150" />
		<media:content url="http://pro.gigaom.com/files/2009/04/gigaompromasterimagecloud.jpg?w=150" medium="image">
			<media:title type="html">gigaompromasterimagecloud</media:title>
		</media:content>

		<media:content url="http://1.gravatar.com/avatar/4f3860069d181dbeeb398304f5940a9e?s=96&#38;d=retro&#38;r=PG" medium="image">
			<media:title type="html">gigaedit</media:title>
		</media:content>
	</item>
		<item>
		<title>Big, open data: MapR on Github and Yelp&#8217;s dataset challenge</title>
		<link>http://gigaom.com/2013/03/28/big-open-data-mapr-on-github-and-yelps-dataset-challenge/</link>
		<comments>http://gigaom.com/2013/03/28/big-open-data-mapr-on-github-and-yelps-dataset-challenge/#comments</comments>
		<pubDate>Thu, 28 Mar 2013 16:57:42 +0000</pubDate>
		<dc:creator>Derrick Harris</dc:creator>
				<category><![CDATA[big data]]></category>
		<category><![CDATA[data science]]></category>
		<category><![CDATA[github]]></category>
		<category><![CDATA[Hadoop]]></category>
		<category><![CDATA[Mapr]]></category>
		<category><![CDATA[Open Data]]></category>
		<category><![CDATA[open source]]></category>
		<category><![CDATA[yelp]]></category>

		<guid isPermaLink="false">http://gigaom.com/?p=625286</guid>
		<description><![CDATA[MapR is releasing open source code and partnering with Canonical on Ubuntu, while Netflix is releasing some data for for developers to play with. Sounds like a good day for openness.<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=gigaom.com&#038;blog=14960843&#038;post=625286&#038;subd=gigaom2&#038;ref=&#038;feed=1" width="1" height="1" />]]></description>
				<content:encoded><![CDATA[<p>If you&#8217;re into open source, or at least open data, today is a good day. Hadoop vendor MapR has open sourced a portion of its source code <a href="https://github.com/mapr/">on Github</a> and <a href="http://repository.mapr.com/maven/">Maven</a>, while Yelp has released a sample of its data as <a href="http://www.yelp.com/dataset_challenge/">part of a $5,000 challenge</a> to find the most-innovative use for it.</p>
<p>MapR&#8217;s decision to open source parts of it code is significant, but not groundbreaking. The company is only releasing its improvements to a handful of Hadoop-related Apache projects that are included in the MapR distribution of Hadoop, but not the proprietary code that&#8217;s MapR&#8217;s real competitive advantage in the contentious Hadoop market. While it&#8217;s still not flying the all-open-source banner like Hortonworks is, the code release puts MapR more on par with competitor Cloudera, which bolsters its open source aspects with some proprietary software for managing Hadoop clusters.</p>
<p>MapR also took another step in the open source direction on Thursday, announcing a partnership with Canonical that integrates MapR&#8217;s M3 distribution with the Ubuntu Linux operating system. The two also have plans to ease the installation of MapR&#8217;s Hadoop software on OpenStack-based cloud infrastructure.</p>
<p>I wrote recently <a href="http://gigaom.com/2013/03/18/in-battle-for-hadoop-mapr-raises-30m/">in relation to MapR&#8217;s $30 million VC investment</a> that the company is in a tricky position when it comes to open source. The Hadoop ecosystem was <a href="http://gigaom.com/2013/03/04/the-history-of-hadoop-from-4-nodes-to-the-future-of-data/">built on open source and still values it immensely</a>, but some customers are definitely willing to pay money for products that deliver the features they want, open source or not.</p>
<p>As for Yelp, well, it&#8217;s just following in the footsteps of many companies &#8212; <a href="http://gigaom.com/2009/07/27/why-the-netflix-prize-is-a-kind-of-a-big-deal/">Netflix</a> and everyone doing something on Kaggle <a href="https://www.kaggle.com/c/predict-wordpress-likes/forums/t/2738/splunk-innovation-prize-results/14720">(including GigaOM</a>) &#8212; in trying to find new ways to use its data. The data set it&#8217;s releasing is from the Phoenix, Ariz., area and include 11,537 businesses, 8,282 checkin sets, 43,873 users and 229,907 reviews. The deadline for entries is May 20, and they can be submitted in pretty much any form you can imagine.</p>
<p>Hopefully, for Yelp&#8217;s sake, it doesn&#8217;t step in it the way other companies &#8212; <a href="http://gigaom.com/2010/03/12/netflix-cancels-recommendation-engine-contest-settles-privacy-lawsuit/">including Netflix</a> and AOL &#8212; have when they released supposedly anonymous data sets that were later de-anonymized. Releasing data sets gives clear benefits to both the source companies <a href="http://gigaom.com/2012/05/24/in-social-data-a-fight-between-science-and-privacy/">and institutions or individuals accessing the data</a>, but privacy snafus have a away sneaking up and mitigating some of the goodwill.</p>
<p><em>Feature image courtesy of <a href="http://www.shutterstock.com/gallery-249574p1.html">Shutterstock user Jakub Krechowicz</a>.</em></p>
<br />  <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=gigaom.com&#038;blog=14960843&#038;post=625286&#038;subd=gigaom2&#038;ref=&#038;feed=1" width="1" height="1" /><p><a href="http://pubads.g.doubleclick.net/gampad/jump?iu=/1008864/GigaOM_RSS_300x250&#038;sz=300x250&#038;c=921914"><img src="http://pubads.g.doubleclick.net/gampad/ad?iu=/1008864/GigaOM_RSS_300x250&#038;sz=300x250&#038;c=921914" /></a></p><p><strong>Related research and analysis from GigaOM Pro:</strong><br />Subscriber content. <a href="http://pro.gigaom.com/?utm_source=data&utm_medium=editorial&utm_campaign=auto3&utm_term=625286+big-open-data-mapr-on-github-and-yelps-dataset-challenge&utm_content=dharrisstructure">Sign up for a free trial</a>.</p><ul><li><a href="http://pro.gigaom.com/2012/03/a-near-term-outlook-for-big-data/?utm_source=data&utm_medium=editorial&utm_campaign=auto3&utm_term=625286+big-open-data-mapr-on-github-and-yelps-dataset-challenge&utm_content=dharrisstructure">A near-term outlook for big data</a></li><li><a href="http://pro.gigaom.com/2012/11/unlocking-big-datas-potential-with-search/?utm_source=data&utm_medium=editorial&utm_campaign=auto3&utm_term=625286+big-open-data-mapr-on-github-and-yelps-dataset-challenge&utm_content=dharrisstructure">How search can unlock the power of big data</a></li><li><a href="http://pro.gigaom.com/2012/04/infrastructure-q1-cloud-and-big-data-woo-the-enterprise/?utm_source=data&utm_medium=editorial&utm_campaign=auto3&utm_term=625286+big-open-data-mapr-on-github-and-yelps-dataset-challenge&utm_content=dharrisstructure">Infrastructure Q1: Cloud and big data woo enterprises</a></li></ul>]]></content:encoded>
			<wfw:commentRss>http://gigaom.com/2013/03/28/big-open-data-mapr-on-github-and-yelps-dataset-challenge/feed/</wfw:commentRss>
		<slash:comments>1</slash:comments>
	
		<media:thumbnail url="http://gigaom2.files.wordpress.com/2013/03/shutterstock_88662181.jpg?w=150" />
		<media:content url="http://gigaom2.files.wordpress.com/2013/03/shutterstock_88662181.jpg?w=150" medium="image">
			<media:title type="html">giving hands</media:title>
		</media:content>

		<media:content url="http://0.gravatar.com/avatar/9e48ffa0913f65c577727457dd63023f?s=96&#38;d=retro&#38;r=PG" medium="image">
			<media:title type="html">dharrisstructure</media:title>
		</media:content>
	</item>
		<item>
		<title>Sector RoadMap: SQL-on-Hadoop platforms in 2013</title>
		<link>http://pro.gigaom.com/report/sql-on-hadoop-roadmap-2013/</link>
		<comments>http://pro.gigaom.com/report/sql-on-hadoop-roadmap-2013/#comments</comments>
		<pubDate>Wed, 20 Mar 2013 12:00:16 +0000</pubDate>
		<dc:creator><a href="http://pro.gigaom.com/members/josephturian/" rel="author">Joseph Turian</a></dc:creator>
				<category><![CDATA[Uncategorized]]></category>
		<category><![CDATA[apache]]></category>
		<category><![CDATA[apache-hive]]></category>
		<category><![CDATA[aster]]></category>
		<category><![CDATA[Aster Big Analytics Appliance]]></category>
		<category><![CDATA[big data]]></category>
		<category><![CDATA[BigInsights]]></category>
		<category><![CDATA[Citus Data]]></category>
		<category><![CDATA[CitusDB]]></category>
		<category><![CDATA[Cloud Computing]]></category>
		<category><![CDATA[Cloudera]]></category>
		<category><![CDATA[Clustrix]]></category>
		<category><![CDATA[Concurrent]]></category>
		<category><![CDATA[Database theory]]></category>
		<category><![CDATA[Dremel]]></category>
		<category><![CDATA[Drill]]></category>
		<category><![CDATA[EMC]]></category>
		<category><![CDATA[Google]]></category>
		<category><![CDATA[Greenplum]]></category>
		<category><![CDATA[Hadapt]]></category>
		<category><![CDATA[Hadoop]]></category>
		<category><![CDATA[Hadoop Distributed File System]]></category>
		<category><![CDATA[HAWQ]]></category>
		<category><![CDATA[Hbase]]></category>
		<category><![CDATA[HCatalog]]></category>
		<category><![CDATA[HDFS]]></category>
		<category><![CDATA[hive]]></category>
		<category><![CDATA[Hortonworks]]></category>
		<category><![CDATA[IBM]]></category>
		<category><![CDATA[Impala]]></category>
		<category><![CDATA[JethroData]]></category>
		<category><![CDATA[karmasphere]]></category>
		<category><![CDATA[Lingual]]></category>
		<category><![CDATA[Mapr]]></category>
		<category><![CDATA[mapreduce]]></category>
		<category><![CDATA[MemSQL]]></category>
		<category><![CDATA[microstrategy]]></category>
		<category><![CDATA[MongoDB]]></category>
		<category><![CDATA[MPP]]></category>
		<category><![CDATA[NewSQL]]></category>
		<category><![CDATA[Optiq]]></category>
		<category><![CDATA[Oracle]]></category>
		<category><![CDATA[parallel computing]]></category>
		<category><![CDATA[pig]]></category>
		<category><![CDATA[Platfora]]></category>
		<category><![CDATA[PostGIS]]></category>
		<category><![CDATA[PostgreSQL]]></category>
		<category><![CDATA[PostreSQL]]></category>
		<category><![CDATA[RainStor]]></category>
		<category><![CDATA[Salesforce.com]]></category>
		<category><![CDATA[SAP]]></category>
		<category><![CDATA[SAP HANA]]></category>
		<category><![CDATA[Splice Machine]]></category>
		<category><![CDATA[SQL]]></category>
		<category><![CDATA[SQL 92]]></category>
		<category><![CDATA[SQL-H]]></category>
		<category><![CDATA[SQLStream]]></category>
		<category><![CDATA[Stinger]]></category>
		<category><![CDATA[Stringer]]></category>
		<category><![CDATA[tableau]]></category>
		<category><![CDATA[Teradata]]></category>
		<category><![CDATA[Twitter]]></category>
		<category><![CDATA[VoltDB]]></category>
		<category><![CDATA[zookeeper]]></category>

		<guid isPermaLink="false">http://pro.gigaom.com/?post_type=go-report&#038;p=171512/</guid>
		<description><![CDATA[Today’s most successful companies are the ones with the ability to capture and analyze all data available to them. Enter SQL-on-Hadoop solutions, which increase the accessibility of Hadoop and allow organizations to reuse their investment learning in SQL. <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=gigaom.com&#038;blog=14960843&#038;post=648564&#038;subd=gigaom2&#038;ref=&#038;feed=1" width="1" height="1" />]]></description>
				<content:encoded><![CDATA[<p>Today’s most successful companies are the ones with the ability to capture and analyze all data available to them. Enter SQL-on-Hadoop solutions, which increase the accessibility of Hadoop and allow organizations to reuse their investment learning in SQL. </p>
<br />  <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=gigaom.com&#038;blog=14960843&#038;post=648564&#038;subd=gigaom2&#038;ref=&#038;feed=1" width="1" height="1" /><p><a href="http://pubads.g.doubleclick.net/gampad/jump?iu=/1008864/GigaOM_RSS_300x250&#038;sz=300x250&#038;c=42038"><img src="http://pubads.g.doubleclick.net/gampad/ad?iu=/1008864/GigaOM_RSS_300x250&#038;sz=300x250&#038;c=42038" /></a></p><p><strong>Related research and analysis from GigaOM Pro:</strong><br />Subscriber content. <a href="http://pro.gigaom.com/?utm_medium=editorial&utm_campaign=auto3&utm_term=648564+sql-on-hadoop-roadmap-2013&utm_content=gigaedit">Sign up for a free trial</a>.</p><ul><li><a href="http://pro.gigaom.com/2012/03/a-near-term-outlook-for-big-data/?utm_medium=editorial&utm_campaign=auto3&utm_term=648564+sql-on-hadoop-roadmap-2013&utm_content=gigaedit">A near-term outlook for big data</a></li><li><a href="http://pro.gigaom.com/2012/04/sector-roadmap-hadoop-platforms-2012/?utm_medium=editorial&utm_campaign=auto3&utm_term=648564+sql-on-hadoop-roadmap-2013&utm_content=gigaedit">2012: The Hadoop infrastructure market booms</a></li><li><a href="http://pro.gigaom.com/2011/03/defining-hadoop-the-players-technologies-and-challenges-of-2011/?utm_medium=editorial&utm_campaign=auto3&utm_term=648564+sql-on-hadoop-roadmap-2013&utm_content=gigaedit">Defining Hadoop: the Players, Technologies and Challenges of 2011</a></li></ul>]]></content:encoded>
			<wfw:commentRss>http://pro.gigaom.com/report/sql-on-hadoop-roadmap-2013/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:thumbnail url="https://gigaom-pro-files.s3.amazonaws.com/files/2012/04/elephant.jpg?w=150" />
		<media:content url="https://gigaom-pro-files.s3.amazonaws.com/files/2012/04/elephant.jpg?w=150" medium="image">
			<media:title type="html">elephant</media:title>
		</media:content>

		<media:content url="http://1.gravatar.com/avatar/4f3860069d181dbeeb398304f5940a9e?s=96&#38;d=retro&#38;r=PG" medium="image">
			<media:title type="html">gigaedit</media:title>
		</media:content>
	</item>
		<item>
		<title>In battle for Hadoop, MapR raises $30M</title>
		<link>http://gigaom.com/2013/03/18/in-battle-for-hadoop-mapr-raises-30m/</link>
		<comments>http://gigaom.com/2013/03/18/in-battle-for-hadoop-mapr-raises-30m/#comments</comments>
		<pubDate>Mon, 18 Mar 2013 22:30:50 +0000</pubDate>
		<dc:creator>Derrick Harris</dc:creator>
				<category><![CDATA[big data]]></category>
		<category><![CDATA[Hadoop]]></category>
		<category><![CDATA[Hbase]]></category>
		<category><![CDATA[Mapr]]></category>
		<category><![CDATA[open source]]></category>

		<guid isPermaLink="false">http://gigaom.com/?p=621549</guid>
		<description><![CDATA[Hadoop vendor is racking up customers and on Monday it announced a $30 million venture-capital investment that brings its total funding to $59 million since launching in 2011.<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=gigaom.com&#038;blog=14960843&#038;post=621549&#038;subd=gigaom2&#038;ref=&#038;feed=1" width="1" height="1" />]]></description>
				<content:encoded><![CDATA[<p>There’s a lot of positioning within the Hadoop community over who has the most contributors to Apache Hadoop and whose distribution is the most open source. Depending on the source, MapR <a href="http://gigaom.com/2012/02/27/hadoop-bigger-than-spring-jboss-and-mysql-combined/">might be singled out as the antithesis of what Hadoop should be</a>. But MapR doesn’t mind the digs: <a href="http://www.mapr.com/">The company</a> is racking up customers and just closed a $30 million venture-capital investment that brings its total funding to $59 million since launching in 2011.</p>
<p>Because <a href="http://gigaom.com/2013/03/04/the-history-of-hadoop-from-4-nodes-to-the-future-of-data/">its roots are as an open-source project</a>, some members of the Hadoop community are rightfully concerned about keeping it as open as possible. This gives customers more flexibility in moving from product to product, they argue, and could help prevent a technological splinter like what happened with Unix in the 1980s and significantly slowed the popular operating system’s uptake and rise to ubiquity.</p>
<div id="attachment_621723" class="wp-caption alignright" style="width: 310px"><a href="http://gigaom2.files.wordpress.com/2013/03/mapr_control_system2.png"><img alt="MapR's feature list" src="http://gigaom2.files.wordpress.com/2013/03/mapr_control_system2.png?w=300&#038;h=300" width="300" height="300" class="size-medium wp-image-621723"></a><p class="wp-caption-text">MapR’s feature list</p></div>
<p>MapR catches some flak because it has made its name pushing a pair of Hadoop distributions (one free and one not) that are based on the company’s proprietary file system that it claims is significantly faster than the standard Hadoop Distributed File System that many of its competitors use. Last year, it announced a commercial version of the usually HDFS-based HBase database, currently in beta, that also includes many of MapR’s homegrown improvements around performance and reliability.</p>
<p>Although, according to MapR VP of Marketing Jack Norris, the criticisms of its semi-proprietary aren’t entirely fair. He told me during a recent call that there are more than a dozen open-source packages within the company’s Hadoop distribution, and noted that allowing data access via <a href="http://en.wikipedia.org/wiki/Network_File_System">NFS</a> is hardly a tool of vendor lock-in.</p>
<p>The company is also <a href="http://gigaom.com/2012/08/17/for-fast-interactive-hadoop-queries-drill-may-be-the-answer/">spearheading the Apache Drill project</a>, an open-source re-envisioning of <a href="http://gigaom.com/2013/03/14/google-bigquery-is-now-even-bigger/">Google’s Dremel</a> for SQL-like queries on Hadoop data. Tomer Shiran, MapR’s director of product management, will be discussing the project during a panel at our <a href="http://event.gigaom.com/structuredata/?utm_source=data&amp;utm_medium=editorial&amp;utm_campaign=intext&amp;utm_term=621549+in-battle-for-hadoop-mapr-raises-30m&amp;utm_content=dharrisstructure">Structure: Data</a> conference this week in New York.</p>
<p>But at the end of the day, MapR is a business and it’s doing what it can to make money in the new world of big data. If customers want features they can’t get from open-source versions of Hadoop, MapR will gladly supply them. In fact, he said, open source is “really not a core issue that comes up during the sales cycle.” (Norris took a more-defensive tone in a discussion about this topic last year: “No one can name the top 5 or 10 engineers on Oracle’s database,” he told me, “and no one really cares.”)</p>
<p>Norris points to <a href="http://blogs.gartner.com/merv-adrian/2013/03/09/open-source-purity-hadoop-and-market-realities/">a recent blog post from Gartner analyst Merv Adrian</a> in defending his company’s position. Addressing the concern over open source and Hadoop — particularly as it relates to MapR and <a href="http://gigaom.com/2013/02/25/emc-to-hadoop-competition-see-ya-wouldnt-wanna-be-ya/">former OEM partner EMC</a> — Adrian wrote: “Having some components of your solution stack provided by the open source community is a fact of life and a benefit for all. So are roads, but nobody accuses Fedex or your pizza delivery guy of being evil for using them without contributing some asphalt.”</p>
<p>But MapR could just as easily point to its customer list and partnerships to prove the effectiveness of its approach, at least. Norris said its customers in fields such as advertising and retail analyze data on more than 90 percent of the internet population monthly and more than a trillion dollars in transactions every year. (It’s pretty mum on naming customers, although Norris did cite ComScore and Ancestry.com as users.) <a href="http://gigaom.com/2012/06/13/amazon-taps-mapr-for-high-powered-elastic-mapreduce/">Both Amazon Web Services</a> and <a href="http://www.mapr.com/company/press-releases/google-compute-engine-and-mapr-technologies-crush-minutesort-record">Google have partnered</a> with MapR to boost Hadoop performance on their cloud platforms.</p>
<p>Still, Hadoop is still relatively young as a commercial technology and it’s very early on for Hadoop as an IT market all its own. What customers like now might not be what they like forever, and there’s plenty of competition for those workloads and dollars. When you look at its bigger, better-funded and better-known competitors such as <a href="http://gigaom.com/2012/12/06/cloudera-snares-big-65m-more-to-boost-international-enterprise-growth/">Cloudera</a>, Hortonworks, <a href="http://gigaom.com/2013/02/25/emc-to-hadoop-competition-see-ya-wouldnt-wanna-be-ya/">EMC Greenplum</a> and <a href="http://gigaom.com/2013/02/26/cloudera-who-intel-announces-its-own-hadoop-distribution/">now Intel</a>, it’s easy to see just how tough a fight MapR has in front of it.</p>
<p>Norris isn’t sweating it, though. “The big major weakness that needs to be addressed [with Hadoop] is the dynamic read/write capability of HDFS,” he told me. As long as the other players keep relying on HDFS at the storage layer, MapR will at least have a strong point of differentiation.</p>
<p>Mayfield Fund led MapR’s latest investment round, and existing investors Lightspeed Venture Partners, NEA and Redpoint Ventures also participated.</p>
<br />  <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=gigaom.com&#038;blog=14960843&#038;post=621549&#038;subd=gigaom2&#038;ref=&#038;feed=1" width="1" height="1" /><p><a href="http://pubads.g.doubleclick.net/gampad/jump?iu=/1008864/GigaOM_RSS_300x250&#038;sz=300x250&#038;c=275253"><img src="http://pubads.g.doubleclick.net/gampad/ad?iu=/1008864/GigaOM_RSS_300x250&#038;sz=300x250&#038;c=275253" /></a></p><p><strong>Related research and analysis from GigaOM Pro:</strong><br />Subscriber content. <a href="http://pro.gigaom.com/?utm_source=data&utm_medium=editorial&utm_campaign=auto3&utm_term=621549+in-battle-for-hadoop-mapr-raises-30m&utm_content=dharrisstructure">Sign up for a free trial</a>.</p><ul><li><a href="http://pro.gigaom.com/2012/11/unlocking-big-datas-potential-with-search/?utm_source=data&utm_medium=editorial&utm_campaign=auto3&utm_term=621549+in-battle-for-hadoop-mapr-raises-30m&utm_content=dharrisstructure">How search can unlock the power of big data</a></li><li><a href="http://pro.gigaom.com/2012/07/scaling-hadoop-clusters-the-role-of-cluster-management/?utm_source=data&utm_medium=editorial&utm_campaign=auto3&utm_term=621549+in-battle-for-hadoop-mapr-raises-30m&utm_content=dharrisstructure">Scaling Hadoop clusters: the role of cluster management</a></li><li><a href="http://pro.gigaom.com/2012/04/infrastructure-q1-cloud-and-big-data-woo-the-enterprise/?utm_source=data&utm_medium=editorial&utm_campaign=auto3&utm_term=621549+in-battle-for-hadoop-mapr-raises-30m&utm_content=dharrisstructure">Infrastructure Q1: Cloud and big data woo enterprises</a></li></ul>]]></content:encoded>
			<wfw:commentRss>http://gigaom.com/2013/03/18/in-battle-for-hadoop-mapr-raises-30m/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:thumbnail url="http://gigaom2.files.wordpress.com/2012/10/shutterstock_70904386.jpg?w=150" />
		<media:content url="http://gigaom2.files.wordpress.com/2012/10/shutterstock_70904386.jpg?w=150" medium="image">
			<media:title type="html">Fighting elephants</media:title>
		</media:content>

		<media:content url="http://0.gravatar.com/avatar/9e48ffa0913f65c577727457dd63023f?s=96&#38;d=retro&#38;r=PG" medium="image">
			<media:title type="html">dharrisstructure</media:title>
		</media:content>

		<media:content url="http://gigaom2.files.wordpress.com/2013/03/mapr_control_system2.png?w=300" medium="image">
			<media:title type="html">MapR&#039;s feature list</media:title>
		</media:content>
	</item>
		<item>
		<title>This week&#8217;s 10 best data stories (so far)</title>
		<link>http://gigaom.com/2013/02/27/this-weeks-10-best-data-stories-so-far/</link>
		<comments>http://gigaom.com/2013/02/27/this-weeks-10-best-data-stories-so-far/#comments</comments>
		<pubDate>Thu, 28 Feb 2013 00:44:29 +0000</pubDate>
		<dc:creator>Derrick Harris</dc:creator>
				<category><![CDATA[artificial intelligence]]></category>
		<category><![CDATA[big data]]></category>
		<category><![CDATA[Continuuity]]></category>
		<category><![CDATA[emc-greenplum]]></category>
		<category><![CDATA[Google]]></category>
		<category><![CDATA[Hadoop]]></category>
		<category><![CDATA[IBM]]></category>
		<category><![CDATA[Intel]]></category>
		<category><![CDATA[LinkedIn]]></category>
		<category><![CDATA[Mapr]]></category>
		<category><![CDATA[Metamarkets]]></category>
		<category><![CDATA[open source]]></category>
		<category><![CDATA[Placed]]></category>
		<category><![CDATA[virtustream]]></category>

		<guid isPermaLink="false">http://gigaom.com/?p=615075</guid>
		<description><![CDATA[There has been a lot of data news already this week -- some big, some interesting, and some both. Here's a collection of the stuff you shouldn't, or don't want to, miss.<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=gigaom.com&#038;blog=14960843&#038;post=615075&#038;subd=gigaom2&#038;ref=&#038;feed=1" width="1" height="1" />]]></description>
				<content:encoded><![CDATA[<p>It has been a busy week for data news already, so here are 10 of the big and/or interesting items you might have missed if you blinked:</p>
<ul><li><a href="http://gigaom.com/2013/02/25/emc-to-hadoop-competition-see-ya-wouldnt-wanna-be-ya/"><img alt="hawq" src="http://gigaom2.files.wordpress.com/2013/02/hawq1.jpg?w=300&#038;h=164" width="300" height="164" class="alignright size-medium wp-image-615210"><strong>EMC Greenplum lays down the SQL-on-Hadoop gauntlet</strong></a><strong>: </strong>The company’s new Pivotal HD Hadoop distribution fuses its analytic database technology with Hadoop to create a single data store for everything. Greenplum co-founder Scott Yara claims the data warehouse — where Greenplum got its start — is the new mainframe.</li>
<li><strong><a href="http://gigaom.com/2013/02/26/cloudera-who-intel-announces-its-own-hadoop-distribution/">Intel does Hadoop</a>: </strong>Intel’s Hadoop distribution is interesting for so many reasons, but the biggest might be the sense that it’s an attempt to keep x86 relevant as ARM pushers pursue big data workloads. Among Intel’s hardware partners are Cray, SuperMicro and Cisco.</li>
</ul><ul><li><strong><a href="http://www.technologyreview.com/view/511846/an-autopsy-of-a-dead-social-network/"><img alt="friendster" src="http://gigaom2.files.wordpress.com/2013/02/friendster.jpg?w=300&#038;h=171" width="300" height="171" class="alignright size-medium wp-image-615212">How Friendster died and Facebook might die</a>: </strong>Researchers studied the collapse of Friendster and decided that a dimished cost-benefit analysis and users’ average number of friends contributed to its demise. The fewer friends, the more influential one friend’s decision to quit. And people quit when services begin to suck.</li>
<li><strong><a href="http://ekvv.uni-bielefeld.de/blog/uninews/entry/blueprint_for_an_artificial_brain">Using memristors to recreate the brain</a>: </strong>This is a heady research project based on the theory that memristors are similar enough to synapses in the human brain that they could help create an artificial brain. Memristors are a nanotechnology that allow electrical currents to pass between circuits based on the past currents they have  transmitted.</li>
<li><strong><a href="http://www.mapr.com/company/press-releases/google-compute-engine-and-mapr-technologies-crush-minutesort-record">MapR and Google in a high-performance lovefest</a>: </strong>MapR is all about faster Hadoop, and Google is all about touting how great its Compute Engine cloud is for high-performance job. A MinuteSort benchmark test of MapR on Compute Engine bested the previous record (and crushed the previous Hadoop record for MinuteSort) — and on standard cloud servers, no less.<strong></strong></li>
<li><strong><a href="http://engineering.linkedin.com/data-replication/open-sourcing-databus-linkedins-low-latency-change-data-capture-system">LinkedIn open sources Databus</a>: </strong>Databus is LinkedIn’s tool for updating changes in data between its various storage systems and applications at high speed. It could be pretty valuable, and I assume it’s something LinkedIn’s Bhaskar Ghosh will discuss <a href="http://event.gigaom.com/structuredata/?utm_source=data&amp;utm_medium=editorial&amp;utm_campaign=intext&amp;utm_term=615075+this-weeks-10-best-data-stories-so-far&amp;utm_content=dharrisstructure">during our guru panel at Structure: Data</a> next month.</li>
</ul><p><img style="font-size:13px;" alt="databus-usecases" src="http://gigaom2.files.wordpress.com/2013/02/databus-usecases.jpg?w=708&#038;h=243" width="708" height="243" class="aligncenter size-large wp-image-615206"></p>
<ul><li><a href="http://www.continuuity.com/news/continuuity-unveils-free-big-data-application-paas"><strong>Continuuity free beta now open to the public</strong></a>: Continuuity is the startup from former Yahoo VP Todd Papaioannou and Facebook engineer Jonathan Gray that’s <a href="http://gigaom.com/2012/10/23/ex-yahoo-facebook-big-data-vets-launch-paas-for-hadoop/">building a platform as a service for developing big data applications</a>. On Wednesday, it opened a beta version to developers who want to test the experience of building Hadoop applications on the cloud-based platform.</li>
<li><a href="http://www.placed.com/press/aisle-to-amazon-showrooming-retail-impact"><img alt="Showrooming-retailer-risk-403ac501feb3773215b42f9a148671de" src="http://gigaom2.files.wordpress.com/2013/02/showrooming-retailer-risk-403ac501feb3773215b42f9a148671de.png?w=300&#038;h=230" width="300" height="230" class="alignright size-medium wp-image-615204"></a><a href="http://www.placed.com/press/aisle-to-amazon-showrooming-retail-impact"><strong>Placed Analytics shows who shops in stores but buys online</strong></a>: This is the latest piece of research from Placed, <a href="http://gigaom.com/2012/07/09/how-placed-wants-map-mobile-app-usage-down-to-the-store/">a startup tracking mobile phone data</a> to determine what businesses people like to visit, or at least hang out near. This report highlights which businesses are most at risk from consumers viewing products in their stores and then buying them on Amazon.</li>
<li><a href="http://www-03.ibm.com/press/us/en/pressrelease/40411.wss"><strong>IBM, South Korea and weather predictions</strong></a>:Weather forecasting has always been a good area for big data and high-performance computing, so this use case is pretty much straight data porn. From the press release: “IBM has provided KMA and NMSC with the latest IBM storage technologies capable of recording 20 gigabytes (equivalent to 400,000 web pages) of data per second … [w]ith a total storage capacity of 9.3 petabytes.”</li>
<li><a href="http://www.virtustream.com/content/virtustream_skilled_analysts_offer_enterprise_big_data_cloud_solutions"><strong>Virtustream using Druid for cloud analytics service</strong></a>: Virtustream is dead serious about staking its claim as theenterprise cloud provider, and this partnership with Metamarkets (see disclosure) is a good way to expand its reach into big data applications. Essentially, Metamarkets will provide consulting services for companies wanting to build apps atop Hadoop and <a href="http://gigaom.com/2012/10/24/metamarkets-open-sources-druid-its-in-memory-database/">Druid</a>, the in-memory analytic database that Metamarkets created.</li>
</ul><p>In addition to LinkedIn’s Ghosh, the founders of Placed, Continuuity and Metamarkets will all be on stage at <a href="http://event.gigaom.com/structuredata/?utm_source=data&amp;utm_medium=editorial&amp;utm_campaign=intext&amp;utm_term=615075+this-weeks-10-best-data-stories-so-far&amp;utm_content=dharrisstructure">Structure: Data</a> talking about everything from building Hadoop applications, to managing massive data infrastructure to the new era of web privacy, so please come come and watch.</p>
<p><em><strong>Disclosure:</strong> Metamarkets is a portfolio company of True Ventures, which is also an investor in GigaOM. Om Malik is also a venture partner at True.</em></p>
<br />  <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=gigaom.com&#038;blog=14960843&#038;post=615075&#038;subd=gigaom2&#038;ref=&#038;feed=1" width="1" height="1" /><p><a href="http://pubads.g.doubleclick.net/gampad/jump?iu=/1008864/GigaOM_RSS_300x250&#038;sz=300x250&#038;c=100314"><img src="http://pubads.g.doubleclick.net/gampad/ad?iu=/1008864/GigaOM_RSS_300x250&#038;sz=300x250&#038;c=100314" /></a></p><p><strong>Related research and analysis from GigaOM Pro:</strong><br />Subscriber content. <a href="http://pro.gigaom.com/?utm_source=data&utm_medium=editorial&utm_campaign=auto3&utm_term=615075+this-weeks-10-best-data-stories-so-far&utm_content=dharrisstructure">Sign up for a free trial</a>.</p><ul><li><a href="http://pro.gigaom.com/2012/07/cloud-and-data-second-quarter-2012-analysis-and-outlook-2/?utm_source=data&utm_medium=editorial&utm_campaign=auto3&utm_term=615075+this-weeks-10-best-data-stories-so-far&utm_content=dharrisstructure">Takeaways from the second quarter in cloud and data</a></li><li><a href="http://pro.gigaom.com/2012/11/unlocking-big-datas-potential-with-search/?utm_source=data&utm_medium=editorial&utm_campaign=auto3&utm_term=615075+this-weeks-10-best-data-stories-so-far&utm_content=dharrisstructure">How search can unlock the power of big data</a></li><li><a href="http://pro.gigaom.com/2012/04/infrastructure-q1-cloud-and-big-data-woo-the-enterprise/?utm_source=data&utm_medium=editorial&utm_campaign=auto3&utm_term=615075+this-weeks-10-best-data-stories-so-far&utm_content=dharrisstructure">Infrastructure Q1: Cloud and big data woo enterprises</a></li></ul>]]></content:encoded>
			<wfw:commentRss>http://gigaom.com/2013/02/27/this-weeks-10-best-data-stories-so-far/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:thumbnail url="http://gigaom2.files.wordpress.com/2013/02/databus-usecases1-e1362010967340.jpg?w=150" />
		<media:content url="http://gigaom2.files.wordpress.com/2013/02/databus-usecases1-e1362010967340.jpg?w=150" medium="image">
			<media:title type="html">databus-usecases</media:title>
		</media:content>

		<media:content url="http://0.gravatar.com/avatar/9e48ffa0913f65c577727457dd63023f?s=96&#38;d=retro&#38;r=PG" medium="image">
			<media:title type="html">dharrisstructure</media:title>
		</media:content>

		<media:content url="http://gigaom2.files.wordpress.com/2013/02/hawq1.jpg?w=300" medium="image">
			<media:title type="html">hawq</media:title>
		</media:content>

		<media:content url="http://gigaom2.files.wordpress.com/2013/02/friendster.jpg?w=300" medium="image">
			<media:title type="html">friendster</media:title>
		</media:content>

		<media:content url="http://gigaom2.files.wordpress.com/2013/02/databus-usecases.jpg?w=708" medium="image">
			<media:title type="html">databus-usecases</media:title>
		</media:content>

		<media:content url="http://gigaom2.files.wordpress.com/2013/02/showrooming-retailer-risk-403ac501feb3773215b42f9a148671de.png?w=300" medium="image">
			<media:title type="html">Showrooming-retailer-risk-403ac501feb3773215b42f9a148671de</media:title>
		</media:content>
	</item>
		<item>
		<title>Cloudera who? Intel announces its own Hadoop distribution</title>
		<link>http://gigaom.com/2013/02/26/cloudera-who-intel-announces-its-own-hadoop-distribution/</link>
		<comments>http://gigaom.com/2013/02/26/cloudera-who-intel-announces-its-own-hadoop-distribution/#comments</comments>
		<pubDate>Tue, 26 Feb 2013 18:26:31 +0000</pubDate>
		<dc:creator>Stacey Higginbotham</dc:creator>
				<category><![CDATA[Cisco]]></category>
		<category><![CDATA[Cloudera]]></category>
		<category><![CDATA[data]]></category>
		<category><![CDATA[Hadoop]]></category>
		<category><![CDATA[Hortonworks]]></category>
		<category><![CDATA[Intel]]></category>
		<category><![CDATA[Mapr]]></category>

		<guid isPermaLink="false">http://gigaom.com/?p=614504</guid>
		<description><![CDATA[Intel's getting into the open source software business with it's own version of Hadoop. It joins a host of startups as well as EMC Greenplum in building a distribution for big data.<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=gigaom.com&#038;blog=14960843&#038;post=614504&#038;subd=gigaom2&#038;ref=&#038;feed=1" width="1" height="1" />]]></description>
				<content:encoded><![CDATA[<p>Intel on Tuesday said it was getting into the software business with its own Hadoop distribution. The move is a potential blow for startups such as Cloudera, Hortonworks and MapR that are offering their own distributions of Hadoop, but it’s also an admission by the chip vendor that the opportunity in big data isn’t only to be found in selling hardware.</p>
<p>In a conference held in San Francisco, VP and General Manager of Intel’s Datacenter Software Division Boyd Davis explained Intel’s history in Hadoop that stretches back to 2009 and stressed that Intel is going to share some aspects of its Hadoop distribution, but not all. Intel has a distribution of Hadoop it has released in China, but today it’s bringing it to the United States Intel’s version of the Hadoop distribution uses Hadoop 2.0 and YARN, which is a cutting-edge version of  platform compared with what most Hadoop users have deployed thus far.</p>
<h2 id="why-intel-wants-to-push-its-ow">Why Intel wants to push its own version of Hadoop</h2>
<p><a href="http://gigaom2.files.wordpress.com/2013/02/intelhadoophistory.jpg"><img alt="intelhadoophistory" src="http://gigaom2.files.wordpress.com/2013/02/intelhadoophistory.jpg?w=708&#038;h=400" width="708" height="400" class="aligncenter size-full wp-image-614518"></a></p>
<p>Boyd introduced partners such as and Cisco, which has tuned the Intel Hadoop distribution for its own servers. Intel also hosted a panel that included executives from SAP, Red Hat and Savvis to discuss the challenges of big data and the promise of Hadoop and big data.</p>
<p>Davis was up front about Intel’s rationale for releasing its own distribution, namely that it was worried about the fragmentation and possible uncertainty associated with current Hadoop distributions. That could be read as a dig against the many startups already offering Hadoop distributions, all of which are slightly different (of course, Intel’s will be slightly different, too). Like all of the existing players such as Cloudera and MapR, Intel will open source certain aspects of its distribution, but will also keep software to itself.</p>
<h2 id="inside-the-data-center-its-no-">Inside the data center, it’s no longer just web servers that matter</h2>
<p>For example, Davis stressed that Intel will not share its management and monitoring software, which could be highly valuable for enterprise customers. The Intel software could coordinate with Intel’s data center management software and make managing a variety of workloads easier. And hidden in that coordination might be one Intel’s aims in pushing its own version of Hadoop — the threat of ARM chips used in Hadoop clusters.</p>
<p>Dell, Calxeda and others are evaluating the use of lower-performance, <a href="http://gigaom.com/2012/10/24/dell-wants-to-tune-big-data-apps-for-arm-servers/">lower-power chips in Hadoop clusters</a>, a market <a href="http://gigaom.com/2011/06/13/big-data-on-micro-servers-you-bet/">Intel would hate to cede in the data center</a> as data grows and analytics becomes more important. To that end, Intel has also optimized its Hadoop distribution for solid-state drives, something that other Hadoop companies haven’t done so far.</p>
<p>When asked about Atom and the use of lower-performance processors for Hadoop, Davis noted that while people are using lower-end processors for Hadoop , but that those uses tend to have slower networking. Davis says that when you combine high-end processors with 10 gigabit Ethernet and Hadoop, customers get the performance that they want. </p>
<p><a href="http://gigaom2.files.wordpress.com/2013/02/intelhadoop.jpg"><img alt="intelhadoop" src="http://gigaom2.files.wordpress.com/2013/02/intelhadoop.jpg?w=708&#038;h=397" width="708" height="397" class="aligncenter size-full wp-image-614552"></a></p>
<p>So while Intel may tout stability and consistency as the reason for it’s decision to become a major player in the software market for big data, it’s also driven by the changes in the data center that threaten the grip Intel has on the hardware inside the data center. The cloud and big data has changed the workloads and hardware requirements for the data center and Intel is playing the long game in trying to release software that can be tuned to its chips.</p>
<h2 id="the-hadoop-drama-isnt-over-yet">The Hadoop drama isn’t over yet</h2>
<p>Intel isn’t the only big vendor touting its own homegrown version of Hadoop. On Monday, <a href="http://gigaom.com/2013/02/25/emc-to-hadoop-competition-see-ya-wouldnt-wanna-be-ya/">EMC’s Greenplum division announced an entirely revamped version</a> of its Hadoop distribution that’s merged with it’s flagship analytic SQL database. These big companies have big existing businesses to protect and lots of resources to put into doing it. As my colleague Derrick Harris wrote on the EMC news:</p>
<blockquote id="quote-looking-past-his-com"><p>Looking past his competitive boasting, though, it’s easy to see [Greenplum's Scott] Yara’s greater point when you ask him what all this Hadoop talks means for the data warehouse business on which Greenplum was built. He points to the mainframe business that fell from its high perch decades ago but still drives billions a year in revenue. A single MPP database system is still faster on certain workloads than SQL on Hadoop, but that gap will close over time and “I do think the center of gravity will move toward HDFS,” he said.</p></blockquote>
<p>Hadoop is a juggernaut when it comes to big data. Intel is a juggernaut when it comes to data center infrastructure. Its decision to enter into the open source software market is a big one for the chip company, for the Hadoop ecosystem and for the myriad startups playing in this space. It’s a topic we’ll explore more during our <a href="http://event.gigaom.com/structuredata/?utm_source=data&amp;utm_medium=editorial&amp;utm_campaign=intext&amp;utm_term=614504+cloudera-who-intel-announces-its-own-hadoop-distribution&amp;utm_content=shigginbotham">Structure Data conference in New York on March 20 and 21</a>.</p>
<br />  <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=gigaom.com&#038;blog=14960843&#038;post=614504&#038;subd=gigaom2&#038;ref=&#038;feed=1" width="1" height="1" /><p><a href="http://pubads.g.doubleclick.net/gampad/jump?iu=/1008864/GigaOM_RSS_300x250&#038;sz=300x250&#038;c=395914"><img src="http://pubads.g.doubleclick.net/gampad/ad?iu=/1008864/GigaOM_RSS_300x250&#038;sz=300x250&#038;c=395914" /></a></p><p><strong>Related research and analysis from GigaOM Pro:</strong><br />Subscriber content. <a href="http://pro.gigaom.com/?utm_source=data&utm_medium=editorial&utm_campaign=auto3&utm_term=614504+cloudera-who-intel-announces-its-own-hadoop-distribution&utm_content=shigginbotham">Sign up for a free trial</a>.</p><ul><li><a href="http://pro.gigaom.com/2012/07/cloud-and-data-second-quarter-2012-analysis-and-outlook-2/?utm_source=data&utm_medium=editorial&utm_campaign=auto3&utm_term=614504+cloudera-who-intel-announces-its-own-hadoop-distribution&utm_content=shigginbotham">Takeaways from the second quarter in cloud and data</a></li><li><a href="http://pro.gigaom.com/2011/04/infrastructure-q1-iaas-comes-down-to-earth-big-data-takes-flight/?utm_source=data&utm_medium=editorial&utm_campaign=auto3&utm_term=614504+cloudera-who-intel-announces-its-own-hadoop-distribution&utm_content=shigginbotham">Infrastructure Q1: IaaS Comes Down to Earth; Big Data Takes Flight</a></li><li><a href="http://pro.gigaom.com/2012/03/why-service-providers-matter-for-the-future-of-big-data/?utm_source=data&utm_medium=editorial&utm_campaign=auto3&utm_term=614504+cloudera-who-intel-announces-its-own-hadoop-distribution&utm_content=shigginbotham">Why service providers matter for the future of big data</a></li></ul>]]></content:encoded>
			<wfw:commentRss>http://gigaom.com/2013/02/26/cloudera-who-intel-announces-its-own-hadoop-distribution/feed/</wfw:commentRss>
		<slash:comments>3</slash:comments>
	
		<media:thumbnail url="http://gigaom2.files.wordpress.com/2013/02/hadoop1-210x140.jpg?w=150" />
		<media:content url="http://gigaom2.files.wordpress.com/2013/02/hadoop1-210x140.jpg?w=150" medium="image">
			<media:title type="html">hadoop1-210x140</media:title>
		</media:content>

		<media:content url="http://1.gravatar.com/avatar/aee37121e18bf76bb9fee4494bab237a?s=96&#38;d=retro&#38;r=PG" medium="image">
			<media:title type="html">shigginbotham</media:title>
		</media:content>

		<media:content url="http://gigaom2.files.wordpress.com/2013/02/intelhadoophistory.jpg" medium="image">
			<media:title type="html">intelhadoophistory</media:title>
		</media:content>

		<media:content url="http://gigaom2.files.wordpress.com/2013/02/intelhadoop.jpg" medium="image">
			<media:title type="html">intelhadoop</media:title>
		</media:content>
	</item>
		<item>
		<title>Bringing big data to bear on big security</title>
		<link>http://gigaom.com/2013/01/30/bringing-big-data-to-bear-on-big-security/</link>
		<comments>http://gigaom.com/2013/01/30/bringing-big-data-to-bear-on-big-security/#comments</comments>
		<pubDate>Wed, 30 Jan 2013 21:11:14 +0000</pubDate>
		<dc:creator>Barb Darrow</dc:creator>
				<category><![CDATA[Amit Yoran]]></category>
		<category><![CDATA[Art Coviello]]></category>
		<category><![CDATA[big data]]></category>
		<category><![CDATA[IBM]]></category>
		<category><![CDATA[Janet Napolitano]]></category>
		<category><![CDATA[Jon Oltsik]]></category>
		<category><![CDATA[Judith Hurwitz]]></category>
		<category><![CDATA[Mapr]]></category>
		<category><![CDATA[Paul Stamp]]></category>
		<category><![CDATA[RSA]]></category>
		<category><![CDATA[SAIC]]></category>
		<category><![CDATA[Sandy Bird]]></category>

		<guid isPermaLink="false">http://gigaom.com/?p=605719</guid>
		<description><![CDATA[Vendors from tiny startups to giants like RSA, IBM and Cisco are rushing to incorporate big data expertise to beef up IT security.<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=gigaom.com&#038;blog=14960843&#038;post=605719&#038;subd=gigaom2&#038;ref=&#038;feed=1" width="1" height="1" />]]></description>
				<content:encoded><![CDATA[<p>If you weren’t paranoid about data security before, you probably are now after a week’s worth of  headlines:</p>
<ul><li><a href="http://finance.yahoo.com/news/u-homeland-chief-cyber-9-215436416.html?desktop_view_default=true">Cyber 9-11 threat could be imminent</a> says Homeland Security Secretary Janet Napolitano.</li>
<li><a href="http://finance.yahoo.com/news/u-homeland-chief-cyber-9-215436416.html?desktop_view_default=true">the Pentagon will </a> <a href="http://www.washingtonpost.com/world/national-security/pentagon-to-boost-cybersecurity-force/2013/01/19/d87d9dc2-5fec-11e2-b05a-605528f6b712_story.html">quintuple its cybersecurity forces</a> in response to fears of a <a href="http://www.fastcompany.com/3005120/pentagon-moves-prevent-cyber-pearl-harbor">“Cyber Pearl Harbor”; </a></li>
<li>Your own <a href="http://www.darkreading.com/vulnerability-management/167901026/security/vulnerabilities/240147276/millions-of-networked-devices-in-harm-s-way.html">Linksys or D-link router</a> may have betrayed you to hackers.</li>
</ul><p>It’s enough to make you unplug, if that were an option.</p>
<p>But big security vendors are scrambling to meet the threat (and take advantage of these headlines) by touting how they are incorporating big data smarts into their latest offerings. RSA on Wednesday announced<a href="http://www.emc.com/security/security-analytics/security-analytics.htm"> RSA Security Analytics -</a>- which brings together the company’s <a href="http://www.emc.com/about/news/press/2011/20110404-01.htm">NetWitness</a> forensics and Envision log management and the <a href="http://gigaom.com/2011/05/25/startup-mapr-underpins-emcs-hadoop-effort/">GreenPlum MapR distribution</a>, according to Amit Yoran, SVP of and GM of RSA’s Security Management &amp; Compliance Business.</p>
<div id="attachment_605883" class="wp-caption alignleft" style="width: 267px"><a href="http://gigaom.com/2013/01/30/bringing-big-data-to-bear-on-big-security/amit-yoran_netwitness/" rel="attachment wp-att-605883"><img alt="Amit Yoran, SVP and GM of  RSA's Security Management &amp; Compliance Business Unit ." src="http://gigaom2.files.wordpress.com/2013/01/amit-yoran_netwitness.jpg?w=257&#038;h=300" width="257" height="300" class="size-medium wp-image-605883"></a><p class="wp-caption-text">Amit Yoran, SVP and GM of RSA’s Security Management &amp; Compliance Business Unit .</p></div>
<p>IBM’s on board with <a href="http://www.securityweek.com/ibm-taps-security-intelligence-and-big-data-detect-hidden-threats">IBM Security Intelligence with Big Data</a>, that ties together IBM Security QRadar SIEM and Big Data Platform among other things. Sandy Bird, CTO of IBM’s security systems division told <a href="http://blogs.wsj.com/cio/2013/01/29/ibm-security-tool-can-flag-disgruntled-employees/">the<em> Wall Street Journal</em></a> that this software can help “CIOs detect internal and external security threats in new ways—and can even scan email and social media to flag apparently ‘disgruntled’ employees who might be inclined to reveal company secrets.”</p>
<p>And <a href="https://github.com/packetloop/packetpig">Cisco Systems is buying Cognitive Security,</a> a privately-held network security company that uses artificial intelligence to detect advanced cyber threats, according to the Cisco blog announcing the acquisition.</p>
<h2 id="big-opportunity-crowded-market">Big opportunity, crowded market</h2>
<p>Big data security analytics, says Jon Oltsik, senior principal analyst with <a href="http://www.esg-global.com/author/jon-oltsik/">Enterprise Strategy Group</a>, is obviously a hot market. Other contenders include <a href="http://www.saic.com/">SAIC </a>and startups like RedLambda, PacketLoop, ZettaSet, Sumo Logic and  Palantir <del>Palentir</del>. The <a href="https://github.com/packetloop/packetpig">PacketPig</a> open source project focuses on security and analytics, he said.</p>
<p>All these players see a chance to apply big data expertise to tilt the balance of power against cyber villains and because of big data’s ability to handle streams of log data and credentials, it does change the game, making it easier to prioritize threats and examine user behavior to spot anomalies, as GigaOM’s<a href="http://gigaom.com/2012/11/15/6-ways-big-data-is-helping-reinvent-enterprise-security/"> Derrick Harris has reported</a>.</p>
<p>Traditionally, companies found threats by examining server and network logs to tell them if their systems had been accessed. It was a rear-guard, after-the-fact reaction. Proper use of big data analytics can accelerate the process. The idea is to apply analytics to this data as  it flows into the network and watch for patterns. “That lets you spot things that you would not have seen till after the fact before or would have thought was a blip,” said Judith Hurwitz, president of Hurwitz &amp; Associates, an IT consultancy. The upshot is companies can shut threats down faster or possibly even prevent a breach by detecting hacker activity before it gets expensive.</p>
<h2 id="goal-better-faster-info-on-dig">Goal: better, faster info on digital evil doers</h2>
<p>These new offerings promise to give companies a full picture of who’s coming into their network, who’s talking to whom, and spot anomalies or atypical user behavior while it is still actionable.</p>
<p>“If Johnny used five IP addresses and four user IDs and 12 different accounts  … the time to detect that activity will go from many hours to a few minutes.  This is a simple use case, not sci-fi.”  said Paul Stamp, director of product marketing for RSA.</p>
<p>Of course, this is an arms race. The good guys build and deploy cool new technologies, then the bad guys — no fools — use the same types of technology to overcome those defenses.</p>
<p>RSA’s executive chairman Art Coviello conceded as much at a press briefing Tuesday at RSA headquarters in Bedford, Mass. “It’s not about perfect security, its all about ratcheting down risk as much as you can.”</p>
<p>To learn more about the opportunities and challenges in the era of big data, check out <a href="http://event.gigaom.com/structuredata/?utm_source=data&amp;utm_medium=editorial&amp;utm_campaign=intext&amp;utm_term=605719+bringing-big-data-to-bear-on-big-security&amp;utm_content=gigabarb">GigaOM’s Structure Data</a> event March 20-21 in New York.</p>
<br />  <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=gigaom.com&#038;blog=14960843&#038;post=605719&#038;subd=gigaom2&#038;ref=&#038;feed=1" width="1" height="1" /><p><a href="http://pubads.g.doubleclick.net/gampad/jump?iu=/1008864/GigaOM_RSS_300x250&#038;sz=300x250&#038;c=625161"><img src="http://pubads.g.doubleclick.net/gampad/ad?iu=/1008864/GigaOM_RSS_300x250&#038;sz=300x250&#038;c=625161" /></a></p><p><strong>Related research and analysis from GigaOM Pro:</strong><br />Subscriber content. <a href="http://pro.gigaom.com/?utm_source=data&utm_medium=editorial&utm_campaign=auto3&utm_term=605719+bringing-big-data-to-bear-on-big-security&utm_content=gigabarb">Sign up for a free trial</a>.</p><ul><li><a href="http://pro.gigaom.com/2012/12/big-data-2013-key-trends-and-companies-to-watch/?utm_source=data&utm_medium=editorial&utm_campaign=auto3&utm_term=605719+bringing-big-data-to-bear-on-big-security&utm_content=gigabarb">Big data 2013: key trends and companies to watch</a></li><li><a href="http://pro.gigaom.com/2012/11/unlocking-big-datas-potential-with-search/?utm_source=data&utm_medium=editorial&utm_campaign=auto3&utm_term=605719+bringing-big-data-to-bear-on-big-security&utm_content=gigabarb">How search can unlock the power of big data</a></li><li><a href="http://pro.gigaom.com/2012/07/scaling-hadoop-clusters-the-role-of-cluster-management/?utm_source=data&utm_medium=editorial&utm_campaign=auto3&utm_term=605719+bringing-big-data-to-bear-on-big-security&utm_content=gigabarb">Scaling Hadoop clusters: the role of cluster management</a></li></ul>]]></content:encoded>
			<wfw:commentRss>http://gigaom.com/2013/01/30/bringing-big-data-to-bear-on-big-security/feed/</wfw:commentRss>
		<slash:comments>3</slash:comments>
	
		<media:thumbnail url="http://gigaom2.files.wordpress.com/2012/11/shutterstock_90909827-e1353008050857.jpg?w=150" />
		<media:content url="http://gigaom2.files.wordpress.com/2012/11/shutterstock_90909827-e1353008050857.jpg?w=150" medium="image">
			<media:title type="html">web security</media:title>
		</media:content>

		<media:content url="http://1.gravatar.com/avatar/4af03439988d64f816da72496325cb73?s=96&#38;d=retro&#38;r=PG" medium="image">
			<media:title type="html">gigabarb</media:title>
		</media:content>

		<media:content url="http://gigaom2.files.wordpress.com/2013/01/amit-yoran_netwitness.jpg?w=257" medium="image">
			<media:title type="html">Amit Yoran, SVP and GM of  RSA&#039;s Security Management &#38; Compliance Business Unit .</media:title>
		</media:content>
	</item>
	</channel>
</rss>
