<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
	xmlns:content="http://purl.org/rss/1.0/modules/content/"
	xmlns:wfw="http://wellformedweb.org/CommentAPI/"
	xmlns:dc="http://purl.org/dc/elements/1.1/"
	xmlns:atom="http://www.w3.org/2005/Atom"
	xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
	xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
	xmlns:georss="http://www.georss.org/georss" xmlns:geo="http://www.w3.org/2003/01/geo/wgs84_pos#" xmlns:media="http://search.yahoo.com/mrss/"
	>

<channel>
	<title>GigaOM &#187; Greenplum</title>
	<atom:link href="http://gigaom.com/tag/greenplum/feed/" rel="self" type="application/rss+xml" />
	<link>http://gigaom.com</link>
	<description></description>
	<lastBuildDate>Thu, 23 May 2013 15:36:01 +0000</lastBuildDate>
	<language>en</language>
	<sy:updatePeriod>hourly</sy:updatePeriod>
	<sy:updateFrequency>1</sy:updateFrequency>
	<generator>http://wordpress.com/</generator>
<cloud domain='gigaom.com' port='80' path='/?rsscloud=notify' registerProcedure='' protocol='http-post' />
<image>
		<url>http://0.gravatar.com/blavatar/0db8f6557d022075dbbf010c54d46d93?s=96&#038;d=http%3A%2F%2Fs2.wp.com%2Fi%2Fbuttonw-com.png</url>
		<title>GigaOM &#187; Greenplum</title>
		<link>http://gigaom.com</link>
	</image>
	<atom:link rel="search" type="application/opensearchdescription+xml" href="http://gigaom.com/osd.xml" title="GigaOM" />
	<atom:link rel='hub' href='http://gigaom.com/?pushpress=hub'/>
		<item>
		<title>How data warehousing is now a cost-effective solution for businesses</title>
		<link>http://pro.gigaom.com/report/the-new-economics-of-enterprise-data-warehousing/</link>
		<comments>http://pro.gigaom.com/report/the-new-economics-of-enterprise-data-warehousing/#comments</comments>
		<pubDate>Mon, 13 May 2013 06:55:34 +0000</pubDate>
		<dc:creator>nraden</dc:creator>
				<category><![CDATA[Uncategorized]]></category>
		<category><![CDATA[ADAPA]]></category>
		<category><![CDATA[analytics]]></category>
		<category><![CDATA[apache-hadoop]]></category>
		<category><![CDATA[big data]]></category>
		<category><![CDATA[Business Intelligence]]></category>
		<category><![CDATA[clickstream analytics]]></category>
		<category><![CDATA[Cloud Computing]]></category>
		<category><![CDATA[cloud-infrastructure]]></category>
		<category><![CDATA[columnar databases]]></category>
		<category><![CDATA[data]]></category>
		<category><![CDATA[data analysis]]></category>
		<category><![CDATA[data management]]></category>
		<category><![CDATA[data storage]]></category>
		<category><![CDATA[data warehouse]]></category>
		<category><![CDATA[data warehousing]]></category>
		<category><![CDATA[data-analytics]]></category>
		<category><![CDATA[database]]></category>
		<category><![CDATA[database technology]]></category>
		<category><![CDATA[Database theory]]></category>
		<category><![CDATA[distributed processing]]></category>
		<category><![CDATA[EMC]]></category>
		<category><![CDATA[etl]]></category>
		<category><![CDATA[extraction transform load systems]]></category>
		<category><![CDATA[Ferrari]]></category>
		<category><![CDATA[file systems]]></category>
		<category><![CDATA[Greenplum]]></category>
		<category><![CDATA[Hadoop]]></category>
		<category><![CDATA[HDFS]]></category>
		<category><![CDATA[Hewlett-Packard]]></category>
		<category><![CDATA[high-speed technologies]]></category>
		<category><![CDATA[HP]]></category>
		<category><![CDATA[IBM]]></category>
		<category><![CDATA[information technology]]></category>
		<category><![CDATA[integrated circuit]]></category>
		<category><![CDATA[Linux]]></category>
		<category><![CDATA[Mapr]]></category>
		<category><![CDATA[mobile devices]]></category>
		<category><![CDATA[Moore's Law]]></category>
		<category><![CDATA[Netezza]]></category>
		<category><![CDATA[Open Source Software]]></category>
		<category><![CDATA[parallel processing]]></category>
		<category><![CDATA[relational-databases]]></category>
		<category><![CDATA[scalability]]></category>
		<category><![CDATA[social networks]]></category>
		<category><![CDATA[storage devices]]></category>
		<category><![CDATA[storage virtualization technologies]]></category>
		<category><![CDATA[System administration]]></category>
		<category><![CDATA[tco]]></category>
		<category><![CDATA[total-cost-of-ownership]]></category>
		<category><![CDATA[Transaction processing]]></category>
		<category><![CDATA[Truviso]]></category>
		<category><![CDATA[Vertica]]></category>
		<category><![CDATA[Virtualization technology]]></category>

		<guid isPermaLink="false">http://pro.gigaom.com/?post_type=go-report&#038;p=175747/</guid>
		<description><![CDATA[Data-warehouse providers are quickly adding Hadoop distributions, or even their own versions of Hadoop, into their architecture, adding further cost advantages to collections of extremely large data sets. Finding the talent to manage this newly converged environment will not be easy, but it presents tremendous opportunity for companies willing to take some risk.<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=gigaom.com&#038;blog=14960843&#038;post=648494&#038;subd=gigaom2&#038;ref=&#038;feed=1" width="1" height="1" />]]></description>
				<content:encoded><![CDATA[<p>The new economics of data warehousing provide attractive alternatives in both costs and benefits. While big data gets most of the attention, evolved data warehousing will play an important role for the foreseeable future. In order to be relevant, data-warehouse design and operation need to be simplified, taking advantage of greatly improved hardware, software, and methods.</p>
<br />  <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=gigaom.com&#038;blog=14960843&#038;post=648494&#038;subd=gigaom2&#038;ref=&#038;feed=1" width="1" height="1" /><p><a href="http://pubads.g.doubleclick.net/gampad/jump?iu=/1008864/GigaOM_RSS_300x250&#038;sz=300x250&#038;c=525586"><img src="http://pubads.g.doubleclick.net/gampad/ad?iu=/1008864/GigaOM_RSS_300x250&#038;sz=300x250&#038;c=525586" /></a></p><p><strong>Related research and analysis from GigaOM Pro:</strong><br />Subscriber content. <a href="http://pro.gigaom.com/?utm_medium=editorial&utm_campaign=auto3&utm_term=648494+the-new-economics-of-enterprise-data-warehousing&utm_content=nraden">Sign up for a free trial</a>.</p><ul><li><a href="http://pro.gigaom.com/2012/04/infrastructure-q1-cloud-and-big-data-woo-the-enterprise/?utm_medium=editorial&utm_campaign=auto3&utm_term=648494+the-new-economics-of-enterprise-data-warehousing&utm_content=nraden">Infrastructure Q1: Cloud and big data woo enterprises</a></li><li><a href="http://pro.gigaom.com/2012/07/cloud-and-data-second-quarter-2012-analysis-and-outlook-2/?utm_medium=editorial&utm_campaign=auto3&utm_term=648494+the-new-economics-of-enterprise-data-warehousing&utm_content=nraden">Takeaways from the second quarter in cloud and data</a></li><li><a href="http://pro.gigaom.com/2011/04/infrastructure-q1-iaas-comes-down-to-earth-big-data-takes-flight/?utm_medium=editorial&utm_campaign=auto3&utm_term=648494+the-new-economics-of-enterprise-data-warehousing&utm_content=nraden">Infrastructure Q1: IaaS Comes Down to Earth; Big Data Takes Flight</a></li></ul>]]></content:encoded>
			<wfw:commentRss>http://pro.gigaom.com/report/the-new-economics-of-enterprise-data-warehousing/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:thumbnail url="https://gigaom-pro-files.s3.amazonaws.com/files/2012/08/datacenter1.jpg?w=150" />
		<media:content url="https://gigaom-pro-files.s3.amazonaws.com/files/2012/08/datacenter1.jpg?w=150" medium="image">
			<media:title type="html">datacenter1</media:title>
		</media:content>

		<media:content url="http://0.gravatar.com/avatar/fdbbd80432b14e9d84aa12c6fc0cce24?s=96&#38;d=retro&#38;r=PG" medium="image">
			<media:title type="html">nraden</media:title>
		</media:content>
	</item>
		<item>
		<title>How EMC&#8217;s CTO is trying to keep EMC, VMware and Pivotal orbiting the same sun</title>
		<link>http://gigaom.com/2013/05/07/how-emcs-cto-is-trying-to-keep-emc-vmware-and-pivotal-orbiting-the-same-sun/</link>
		<comments>http://gigaom.com/2013/05/07/how-emcs-cto-is-trying-to-keep-emc-vmware-and-pivotal-orbiting-the-same-sun/#comments</comments>
		<pubDate>Wed, 08 May 2013 01:17:09 +0000</pubDate>
		<dc:creator>Derrick Harris</dc:creator>
				<category><![CDATA[big data]]></category>
		<category><![CDATA[EMC]]></category>
		<category><![CDATA[Greenplum]]></category>
		<category><![CDATA[Hadoop]]></category>
		<category><![CDATA[Pivotal]]></category>
		<category><![CDATA[software-defined data center]]></category>
		<category><![CDATA[storage]]></category>
		<category><![CDATA[VMWare]]></category>

		<guid isPermaLink="false">http://gigaom.com/?p=643152</guid>
		<description><![CDATA[EMC CTO John Roese has a tough, but important job trying to keep EMC, VMware and Pivotal all moving in the same direction. While the three are separate companies, their fates are also very much aligned.<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=gigaom.com&#038;blog=14960843&#038;post=643152&#038;subd=gigaom2&#038;ref=&#038;feed=1" width="1" height="1" />]]></description>
				<content:encoded><![CDATA[<p>If you&#8217;re confused about all the action with EMC, VMware and Pivotal over the past several months, you&#8217;re not alone. CEOs <a href="http://gigaom.com/2012/07/17/maritz-is-out-as-vmware-ceo-but-takes-strategic-role-at-emc/">have traded places,</a> joint ventures <a href="http://gigaom.com/2013/03/13/the-pivotal-initiative-in-case-you-were-wondering-is-now-official/">have been struck</a>, product lines <a href="http://gigaom.com/2013/05/01/vmware-garage-sale-continues-as-it-offloads-wavemaker-to-pramati/">have been sold</a> and GE <a href="http://gigaom.com/2013/04/24/ge-to-pour-105m-into-emc-and-vmwares-pivotal-initiative/">even came on board</a>. And that&#8217;s before you even start talking about all the new technology.</p>
<p>I sat down with EMC SVP and CTO John Roese on Tuesday at the company&#8217;s annual EMC World conference to find out what&#8217;s up. Here&#8217;s what he had to say.</p>
<h2 id="on-three-companies-under-one-r">On three companies under one roof</h2>
<p>While they&#8217;re technically three separate companies, EMC is really in control. It&#8217;s the majority shareholder in VMware and owns more than 60 percent of Pivotal, its new joint venture with VMware that includes the <a href="http://gigaom.com/2013/02/25/emc-to-hadoop-competition-see-ya-wouldnt-wanna-be-ya/">Greenplum</a>, <a href="http://gigaom.com/2012/03/16/exclusive-emc-buys-pivotal-labs/">Pivotal Labs</a>, <a href="http://gigaom.com/2012/05/15/can-vmware-draw-developers-developers-developers/">SpringSource</a>, <a href="http://gigaom.com/2013/03/07/for-sale-from-pivotal-initiative-cloud-foundry/">Cloud Foundry</a> and <a href="http://gigaom.com/2012/04/24/vmware-buys-big-data-startup-cetas/">Cetas</a> business lines. When it comes to everyone working toward a common goal, Roese said, &#8220;The good news is that while there is independence, Joe Tucci is the chairman of all these companies.&#8221;</p>
<p>Roese calls himself the &#8220;gravitational center&#8221; of the three companies when it comes to technology. This is a reinvention of the CTO role at EMC, which used to be more of a research position. Now, he puts the stake in the ground and generally directs everyone toward it, even if they&#8217;re not all taking the same path to get there.</p>
<h2 id="on-why-pivotal-happened-and-wh">On why Pivotal happened and why it matters</h2>
<p>My takeaway from Roese&#8217;s comments on formation of Pivotal is that Greenplum is really the linchpin of the whole company. At its core, Pivotal is about building big data infrastructure <a href="http://gigaom.com/2013/03/19/the-world-is-ready-for-the-consumer-grade-enterprise/">that can handle next-generation workloads</a>, but it&#8217;s aware that broad adoption is only possible if that high technology becomes easier to consume. That means new higher-level applications, which is where SpringSource, Cloud Foundry and Pivotal Labs come into play.</p>
<p>All of this technically could have been accomplished by just selling Greenplum and Pivotal Labs (the only assets of the new company that was under the EMC umbrella) to VMware, but Roese said VMware wasn&#8217;t the right home because VMware is not so important in the places where next-generation workloads are popping up. There&#8217;s not a lot of VMware inside carriers&#8217; data centers, he acknowledged, but <a href="http://gigaom.com/2013/04/14/rackspace-wants-to-be-the-openstack-provider-to-the-stars/">there is a lot of OpenStack popping up</a>. And there&#8217;s a lot of Amazon Web Services everywhere you look.</p>
<p>&#8220;We would like the big data infrastructure to not care about that,&#8221; Roese explained. From EMC&#8217;s perspective, it doesn&#8217;t need to own the middle &#8212; the cloud operating system, if you will &#8212; if it can still engage customers at the storage and application-platform layers.</p>
<h2 id="on-keeping-independent-while-w">On keeping independent while working an &#8216;unfair advantage&#8217;</h2>
<p>Roese doesn&#8217;t think a vertically integrated approach is the best way to do business in today&#8217;s technology world, which is why EMC, VMware and Pivotal all operate independently and no one relies on another in order to work within customers&#8217; data centers. That&#8217;s why VMware <a href="http://gigaom.com/2013/03/13/vmwares-hybrid-vcloud-takes-on-amazon-kinda/">has its own cloud computing efforts</a> but Pivotal is cloud-agnostic, why EMC storage can operate with any higher-level software and why VMware doesn&#8217;t care about what&#8217;s running underneath or, usually, above it.</p>
<p>However, he added, it&#8217;s only natural the three companies seek an &#8220;unfair advantage&#8221; from the incestuous bonds they share. What he means, of course, is that they should keep a close eye on what the others are doing and work together to ensure they&#8217;re all optimized for the same types of workloads. For example, Roese said, if EMC didn&#8217;t reconsider how storage had to perform given that virtualization is the norm or that technology like Hadoop exists, it would &#8220;become suboptimal or generic.&#8221;</p>
<p>The same holds true for Pivotal and VMware. Pivotal needs to think about <a href="http://gigaom.com/2012/06/13/vmware-aims-for-hadoop-on-vms-with-serengeti-project/">how big data applications run on virtualized resources</a> differently than on big bare metal systems, as well as on flash-based arrays like what EMC is about to roll out based on its <a href="http://gigaom.com/2012/05/10/emc-goes-all-flash-buys-xtremio-for-430m/">XtremIO acquisition</a>. VMware and EMC need to think about how their <a href="http://gigaom.com/2013/03/13/vmware-to-virtualize-networks-with-software-incorporating-niciras-capabilities/">software-defined data center</a> and <a href="http://gigaom.com/2013/05/06/emc-plots-software-defined-data-center-journey-from-vipr-storage-virtualization-base/">software-defined storage</a> approaches can build off each other.</p>
<p>From EMC&#8217;s perspective, it&#8217;s easy to see why this all matters. It is at its core an information infrastructure company, but &#8220;the challenging thing with that is that it&#8217;s a moving target,&#8221; Roese said. A company like EMC can&#8217;t get by on storage arrays alone anymore, but it also can&#8217;t be dumb enough to think it can be everything to everyone and still be good at anything.</p>
<br />  <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=gigaom.com&#038;blog=14960843&#038;post=643152&#038;subd=gigaom2&#038;ref=&#038;feed=1" width="1" height="1" /><p><a href="http://pubads.g.doubleclick.net/gampad/jump?iu=/1008864/GigaOM_RSS_300x250&#038;sz=300x250&#038;c=448269"><img src="http://pubads.g.doubleclick.net/gampad/ad?iu=/1008864/GigaOM_RSS_300x250&#038;sz=300x250&#038;c=448269" /></a></p><p><strong>Related research and analysis from GigaOM Pro:</strong><br />Subscriber content. <a href="http://pro.gigaom.com/?utm_source=cloud&utm_medium=editorial&utm_campaign=auto3&utm_term=643152+how-emcs-cto-is-trying-to-keep-emc-vmware-and-pivotal-orbiting-the-same-sun&utm_content=dharrisstructure">Sign up for a free trial</a>.</p><ul><li><a href="http://pro.gigaom.com/2012/04/infrastructure-q1-cloud-and-big-data-woo-the-enterprise/?utm_source=cloud&utm_medium=editorial&utm_campaign=auto3&utm_term=643152+how-emcs-cto-is-trying-to-keep-emc-vmware-and-pivotal-orbiting-the-same-sun&utm_content=dharrisstructure">Infrastructure Q1: Cloud and big data woo enterprises</a></li><li><a href="http://pro.gigaom.com/2012/03/a-near-term-outlook-for-big-data/?utm_source=cloud&utm_medium=editorial&utm_campaign=auto3&utm_term=643152+how-emcs-cto-is-trying-to-keep-emc-vmware-and-pivotal-orbiting-the-same-sun&utm_content=dharrisstructure">A near-term outlook for big data</a></li><li><a href="http://pro.gigaom.com/2012/10/cloud-and-data-third-quarter-2012-analysis-and-outlook/?utm_source=cloud&utm_medium=editorial&utm_campaign=auto3&utm_term=643152+how-emcs-cto-is-trying-to-keep-emc-vmware-and-pivotal-orbiting-the-same-sun&utm_content=dharrisstructure">Cloud and data third-quarter 2012</a></li></ul>]]></content:encoded>
			<wfw:commentRss>http://gigaom.com/2013/05/07/how-emcs-cto-is-trying-to-keep-emc-vmware-and-pivotal-orbiting-the-same-sun/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:thumbnail url="http://gigaom2.files.wordpress.com/2013/05/john_roese_225.jpg?w=150" />
		<media:content url="http://gigaom2.files.wordpress.com/2013/05/john_roese_225.jpg?w=150" medium="image">
			<media:title type="html">John_Roese_225</media:title>
		</media:content>

		<media:content url="http://0.gravatar.com/avatar/9e48ffa0913f65c577727457dd63023f?s=96&#38;d=retro&#38;r=PG" medium="image">
			<media:title type="html">dharrisstructure</media:title>
		</media:content>
	</item>
		<item>
		<title>With Impala now GA, Cloudera&#8217;s CEO sizes up the SQL-on-Hadoop market</title>
		<link>http://gigaom.com/2013/04/30/with-impala-now-ga-clouderas-ceo-sizes-up-the-sql-on-hadoop-market/</link>
		<comments>http://gigaom.com/2013/04/30/with-impala-now-ga-clouderas-ceo-sizes-up-the-sql-on-hadoop-market/#comments</comments>
		<pubDate>Tue, 30 Apr 2013 13:00:40 +0000</pubDate>
		<dc:creator>Derrick Harris</dc:creator>
				<category><![CDATA[big data]]></category>
		<category><![CDATA[Cloudera]]></category>
		<category><![CDATA[data warehouse]]></category>
		<category><![CDATA[Greenplum]]></category>
		<category><![CDATA[Hadoop]]></category>
		<category><![CDATA[hive]]></category>
		<category><![CDATA[Hortonworks]]></category>
		<category><![CDATA[Impala]]></category>
		<category><![CDATA[Mapr]]></category>
		<category><![CDATA[open source]]></category>
		<category><![CDATA[SQL]]></category>
		<category><![CDATA[SQL on Hadoop]]></category>

		<guid isPermaLink="false">http://gigaom.com/?p=640777</guid>
		<description><![CDATA[Cloudera's Impala engine for interactive SQL queries on Hadoop data is now generally available, and CEO Mike Olson gives his lay of the competitive landscape.<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=gigaom.com&#038;blog=14960843&#038;post=640777&#038;subd=gigaom2&#038;ref=&#038;feed=1" width="1" height="1" />]]></description>
				<content:encoded><![CDATA[<p>There is no shortage of confidence in the Hadoop space, and market leader Cloudera bolstered its own on Tuesday with the general availability of its Impala SQL query engine for Hadoop. And if CEO Mike Olson&#8217;s comments are any indication, we&#8217;re in for a long ride of competitive jockeying and oneupmanship as Cloudera and its peers go all Microsoft or Google and create myriad new data-processing engines to turn their Hadoop distributions into bona fide platforms.</p>
<p>Launched as a private beta in May 2012 and <a href="http://gigaom.com/2012/10/24/cloudera-makes-sql-a-first-class-citizen-in-hadoop/">made public in October</a>, Impala is Cloudera&#8217;s attempt to address the growing demand for interactive SQL analytics on Hadoop data. It&#8217;s essentially a massively parallel database designed to share the same storage platform and metadata as Hadoop MapReduce, only it is its own separate processing engine.</p>
<div id="attachment_640848" class="wp-caption alignleft" style="width: 310px"><a href="http://gigaom2.files.wordpress.com/2013/04/impala-arch-new.jpg"><img  alt="How Impala fits in" src="http://gigaom2.files.wordpress.com/2013/04/impala-arch-new.jpg?w=300&#038;h=257" width="300" height="257" class="size-medium wp-image-640848" /></a><p class="wp-caption-text">How Impala fits in</p></div>
<p>Impala actually uses the same &#8220;nearly ANSI&#8221; version of SQL as does current standard bearer Hive, but that technology (created by Facebook in 2009 as a data warehouse layer for Hadoop) doesn&#8217;t run nearly fast enough to sate many users&#8217; desire for interactive analytics. This is because Hive transforms SQL queries into MapReduce jobs, meaning every one is processed against the entire corpus of data in the Hadoop Distributed File System.</p>
<h2 id="sizing-up-the-competition">Sizing up the competition</h2>
<p>Only Cloudera isn&#8217;t the first to have the idea, <a href="http://gigaom.com/2013/02/21/sql-is-whats-next-for-hadoop-heres-whos-doing-it/">nor is it alone in trying to sell interactive SQL on Hadoop</a>. The idea was <a href="http://gigaom.com/2011/10/21/hadapt-raises-9-5m-for-hadoop-data-warehouse/">first commercialized by Boston-based startup Hadapt</a> in 2011, and is now being pushed by numerous startups and larger Hadoop players. Among them: Pivotal (formerly EMC) Greenplum, MapR (with <a href="http://gigaom.com/2012/08/17/for-fast-interactive-hadoop-queries-drill-may-be-the-answer/">Drill</a>), Hortonworks (with <a href="http://hortonworks.com/blog/100x-faster-hive/">Stinger</a>), Drawn to Scale, Splice Machine, Jethro Data and Citus Data.</p>
<div id="attachment_640858" class="wp-caption aligncenter" style="width: 600px"><a href="http://gigaom2.files.wordpress.com/2013/04/had_graphic2-scaled.jpg"><img  alt="Hadapt's architecture" src="http://gigaom2.files.wordpress.com/2013/04/had_graphic2-scaled.jpg?w=708"   class="size-full wp-image-640858" /></a><p class="wp-caption-text">Hadapt&#8217;s architecture</p></div>
<p>But Cloudera is arguably the biggest name pushing SQL on Hadoop, and CEO Mike Olson thinks Impala stands out for several reasons &#8212; not the least of which is that it exists as a product. &#8220;Nobody else is shipping production-grade SQL query support on Hadoop,&#8221; he told me during a recent call. &#8220;At least not in open source.&#8221; He seems content to let the startups do their things, instead focusing his attention on Cloudera&#8217;s big three Hadoop-distribution competitors in Pivotal, MapR and Hortonworks. Greenplum and Pivotal SVP Scott Yara <a href="http://gigaom.com/2013/02/25/emc-to-hadoop-competition-see-ya-wouldnt-wanna-be-ya/">was full of confidence &#8212; and R&amp;D budget</a>&#8211; when the company announced the Pivotal HD distribution and HAWQ technology in February, but Olson claims the approach requires a siloed DBMS within HDFS and is a &#8220;rearguard defensive strategy&#8221; to protect the company&#8217;s sunk costs in its database technology.</p>
<div id="attachment_615210" class="wp-caption aligncenter" style="width: 718px"><a href="http://gigaom2.files.wordpress.com/2013/02/hawq1.jpg"><img  alt="The Pivotal HD and Hawq architecture" src="http://gigaom2.files.wordpress.com/2013/02/hawq1.jpg?w=708&#038;h=387" width="708" height="387" class="size-large wp-image-615210" /></a><p class="wp-caption-text">The Pivotal HD and Hawq architecture</p></div>
<p>As for Hortonworks, Olson questions the wisdom of its Stinger initiative to boost Hive&#8217;s speed, noting that &#8220;Hive never got good while it was running standalone on MapReduce.&#8221; Hortonworks also <a href="http://gigaom.com/2013/04/15/teradata-to-connect-hadoop-and-data-warehouses-roll-out-new-appliance/">partners with vendors such as Teradata</a> to let their platforms access Hadoop data in its native format, but those approaches still require sending data over the network. &#8220;It&#8217;s not the way you would build it if you woke up in the 2000s and were building this anew,&#8221; Olson said.</p>
<div id="attachment_640854" class="wp-caption aligncenter" style="width: 718px"><a href="http://gigaom2.files.wordpress.com/2013/04/stingerroad.png"><img  alt="The Stinger roadmap" src="http://gigaom2.files.wordpress.com/2013/04/stingerroad.png?w=708&#038;h=558" width="708" height="558" class="size-large wp-image-640854" /></a><p class="wp-caption-text">The Stinger roadmap</p></div>
<p>Olson acknowledged that the MapR-led Apache Drill project is cut from the same cloth as Impala (that is, being a Google Dremel clone designed specifically for Hadoop), but &#8220;the difference is we&#8217;re shipping code.&#8221; Being generally available and ready for production workloads means Cloudera can lock down users and market share before many even have a chance to experiment with Drill. He all but dismissed questions over the readiness of Impala, spurred by rumblings in the Hadoop space that Cloudera rushed it into public beta in order to get on the scoreboard against more fully baked offerings.</p>
<p>&#8220;I don&#8217;t feel we&#8217;re under the gun competitively to pull it out of beta because no one else has product in the market,&#8221; Olson said. &#8220;I have no problems &#8230; calling this GA quality.&#8221; He did, however, acknowledge that Impala is shipping with a &#8220;minium viable feature set&#8221; that the company has plans to build on in the near future. Impala Senior Product Manager Justin Erickson noted a few issues of concern, including around the number of concurrent users Impala can support, but said they have been addressed during the beta period.</p>
<h2 id="one-piece-of-a-larger-platform">One piece of a larger platform</h2>
<p>Really, though, the whole point of Impala and its competitors is to turn Hadoop from a tool for batch analytics and mass storage <a href="http://gigaom.com/2013/03/07/5-reasons-why-the-future-of-hadoop-is-real-time-relatively-speaking/">into a platform that can handle nearly all of companies&#8217; data-processing needs</a>. In that regard, it appears we&#8217;re just getting started. Cloudera, MapR, Pivotal Greenplum and Hortonworks are already pushing their own products and projects, and Olson said &#8220;it&#8217;s absolutely our intent&#8221; to enhance Cloudera&#8217;s platform with even more open-source products &#8212; perhaps even more database technologies <a href="http://gigaom.com/2013/04/22/how-hbase-converted-myspaces-mysql-champion-and-is-driving-hadoop-mainstream/">a la HBase</a> &#8212; that will let users do more stuff with more types of data. Over time, this strategy could result in Hadoop displacing the current breed of databases and data warehouses and becoming the single data store atop of which users run whatever applications they so desire. For now, though, especially when it comes to Impala and the data warehouse incumbents, Olson is taking a measured approach. &#8220;The likelihood that we&#8217;re going to knock them off in the near term,&#8221; he said, &#8220;&#8230; it would be a tough fight to win.&#8221;</p>
<br />  <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=gigaom.com&#038;blog=14960843&#038;post=640777&#038;subd=gigaom2&#038;ref=&#038;feed=1" width="1" height="1" /><p><a href="http://pubads.g.doubleclick.net/gampad/jump?iu=/1008864/GigaOM_RSS_300x250&#038;sz=300x250&#038;c=250017"><img src="http://pubads.g.doubleclick.net/gampad/ad?iu=/1008864/GigaOM_RSS_300x250&#038;sz=300x250&#038;c=250017" /></a></p><p><strong>Related research and analysis from GigaOM Pro:</strong><br />Subscriber content. <a href="http://pro.gigaom.com/?utm_source=data&utm_medium=editorial&utm_campaign=auto3&utm_term=640777+with-impala-now-ga-clouderas-ceo-sizes-up-the-sql-on-hadoop-market&utm_content=dharrisstructure">Sign up for a free trial</a>.</p><ul><li><a href="http://pro.gigaom.com/2012/03/a-near-term-outlook-for-big-data/?utm_source=data&utm_medium=editorial&utm_campaign=auto3&utm_term=640777+with-impala-now-ga-clouderas-ceo-sizes-up-the-sql-on-hadoop-market&utm_content=dharrisstructure">A near-term outlook for big data</a></li><li><a href="http://pro.gigaom.com/2012/04/infrastructure-q1-cloud-and-big-data-woo-the-enterprise/?utm_source=data&utm_medium=editorial&utm_campaign=auto3&utm_term=640777+with-impala-now-ga-clouderas-ceo-sizes-up-the-sql-on-hadoop-market&utm_content=dharrisstructure">Infrastructure Q1: Cloud and big data woo enterprises</a></li><li><a href="http://pro.gigaom.com/2012/04/sector-roadmap-hadoop-platforms-2012/?utm_source=data&utm_medium=editorial&utm_campaign=auto3&utm_term=640777+with-impala-now-ga-clouderas-ceo-sizes-up-the-sql-on-hadoop-market&utm_content=dharrisstructure">2012: The Hadoop infrastructure market booms</a></li></ul>]]></content:encoded>
			<wfw:commentRss>http://gigaom.com/2013/04/30/with-impala-now-ga-clouderas-ceo-sizes-up-the-sql-on-hadoop-market/feed/</wfw:commentRss>
		<slash:comments>1</slash:comments>
	
		<media:thumbnail url="http://gigaom2.files.wordpress.com/2013/01/1z5o1503.jpg?w=150" />
		<media:content url="http://gigaom2.files.wordpress.com/2013/01/1z5o1503.jpg?w=150" medium="image">
			<media:title type="html">Structure Data 2012: Michael Olson – CEO, Cloudera</media:title>
		</media:content>

		<media:content url="http://0.gravatar.com/avatar/9e48ffa0913f65c577727457dd63023f?s=96&#38;d=retro&#38;r=PG" medium="image">
			<media:title type="html">dharrisstructure</media:title>
		</media:content>

		<media:content url="http://gigaom2.files.wordpress.com/2013/04/impala-arch-new.jpg?w=300" medium="image">
			<media:title type="html">How Impala fits in</media:title>
		</media:content>

		<media:content url="http://gigaom2.files.wordpress.com/2013/04/had_graphic2-scaled.jpg" medium="image">
			<media:title type="html">Hadapt&#039;s architecture</media:title>
		</media:content>

		<media:content url="http://gigaom2.files.wordpress.com/2013/02/hawq1.jpg?w=708" medium="image">
			<media:title type="html">The Pivotal HD and Hawq architecture</media:title>
		</media:content>

		<media:content url="http://gigaom2.files.wordpress.com/2013/04/stingerroad.png?w=708" medium="image">
			<media:title type="html">The Stinger roadmap</media:title>
		</media:content>
	</item>
		<item>
		<title>Cloud and data first-quarter 2013: analysis and outlook</title>
		<link>http://pro.gigaom.com/report/cloud-and-data-first-quarter-2013-analysis-and-outlook/</link>
		<comments>http://pro.gigaom.com/report/cloud-and-data-first-quarter-2013-analysis-and-outlook/#comments</comments>
		<pubDate>Tue, 09 Apr 2013 06:55:36 +0000</pubDate>
		<dc:creator><a href="http://pro.gigaom.com/members/davidlinthicum/" rel="author">David S. Linthicum</a></dc:creator>
				<category><![CDATA[Uncategorized]]></category>
		<category><![CDATA[Amazon]]></category>
		<category><![CDATA[Amazon cloud computing]]></category>
		<category><![CDATA[Amazon Web Services]]></category>
		<category><![CDATA[amazon-elastic-compute-cloud]]></category>
		<category><![CDATA[Amazon.com]]></category>
		<category><![CDATA[apache-hadoop]]></category>
		<category><![CDATA[apple inc.]]></category>
		<category><![CDATA[aws]]></category>
		<category><![CDATA[Azure Services Platform]]></category>
		<category><![CDATA[big data]]></category>
		<category><![CDATA[central-intelligence-agency]]></category>
		<category><![CDATA[Centralized computing]]></category>
		<category><![CDATA[CIA]]></category>
		<category><![CDATA[Cisco]]></category>
		<category><![CDATA[Cisco Systems]]></category>
		<category><![CDATA[Client/Server]]></category>
		<category><![CDATA[Cloud]]></category>
		<category><![CDATA[Cloud Computing]]></category>
		<category><![CDATA[cloud computing services]]></category>
		<category><![CDATA[Cloud computing taxes]]></category>
		<category><![CDATA[Cloud Storage]]></category>
		<category><![CDATA[cloud storage services]]></category>
		<category><![CDATA[cloud technology]]></category>
		<category><![CDATA[cloud-applications]]></category>
		<category><![CDATA[cloud-based storage services]]></category>
		<category><![CDATA[cloud-infrastructure]]></category>
		<category><![CDATA[Cloudera]]></category>
		<category><![CDATA[CloudMe]]></category>
		<category><![CDATA[computing]]></category>
		<category><![CDATA[consumer-oriented cloud storage services]]></category>
		<category><![CDATA[data management]]></category>
		<category><![CDATA[data processing store]]></category>
		<category><![CDATA[Data Synchronization]]></category>
		<category><![CDATA[database management systems]]></category>
		<category><![CDATA[database technology]]></category>
		<category><![CDATA[DataDirect Networks]]></category>
		<category><![CDATA[Datameer]]></category>
		<category><![CDATA[Dropbox]]></category>
		<category><![CDATA[EMC]]></category>
		<category><![CDATA[file hosting]]></category>
		<category><![CDATA[File system-sharing services]]></category>
		<category><![CDATA[firewall]]></category>
		<category><![CDATA[Google]]></category>
		<category><![CDATA[Greenplum]]></category>
		<category><![CDATA[Hadoop]]></category>
		<category><![CDATA[Hewlett-Packard]]></category>
		<category><![CDATA[HP]]></category>
		<category><![CDATA[IBM]]></category>
		<category><![CDATA[icloud]]></category>
		<category><![CDATA[Idaho State Tax Commission]]></category>
		<category><![CDATA[Income taxes]]></category>
		<category><![CDATA[Intel]]></category>
		<category><![CDATA[iPad]]></category>
		<category><![CDATA[iPhone]]></category>
		<category><![CDATA[Joyent]]></category>
		<category><![CDATA[Linux]]></category>
		<category><![CDATA[Macquarie Capital]]></category>
		<category><![CDATA[Mapr]]></category>
		<category><![CDATA[massively parallel processing]]></category>
		<category><![CDATA[Microsoft]]></category>
		<category><![CDATA[microsoft-windows]]></category>
		<category><![CDATA[mobile device]]></category>
		<category><![CDATA[MongoDB]]></category>
		<category><![CDATA[Nimbula]]></category>
		<category><![CDATA[NoSQL]]></category>
		<category><![CDATA[ObjectRocket]]></category>
		<category><![CDATA[Online backup services]]></category>
		<category><![CDATA[open source]]></category>
		<category><![CDATA[OpenStack]]></category>
		<category><![CDATA[Oracle]]></category>
		<category><![CDATA[oracle-corporation]]></category>
		<category><![CDATA[oracle-database]]></category>
		<category><![CDATA[parallel processing]]></category>
		<category><![CDATA[private clouds]]></category>
		<category><![CDATA[Public Clouds]]></category>
		<category><![CDATA[Rackspace]]></category>
		<category><![CDATA[Relational database]]></category>
		<category><![CDATA[relational database management systems]]></category>
		<category><![CDATA[saleseforce-com]]></category>
		<category><![CDATA[Salesforce.com]]></category>
		<category><![CDATA[SAN]]></category>
		<category><![CDATA[smartphone]]></category>
		<category><![CDATA[smartphones]]></category>
		<category><![CDATA[software]]></category>
		<category><![CDATA[software delivery]]></category>
		<category><![CDATA[SQL]]></category>
		<category><![CDATA[Star Analytics]]></category>
		<category><![CDATA[storage-area-network]]></category>
		<category><![CDATA[Tablet computer]]></category>
		<category><![CDATA[Teradata]]></category>
		<category><![CDATA[U.S. government]]></category>

		<guid isPermaLink="false">http://pro.gigaom.com/?post_type=go-report&#038;p=173124/</guid>
		<description><![CDATA[Cloud computing is finally starting to add value to business, as those in charge of cloud within enterprises are moving from talking to doing. That much was very evident in the first quarter of 2013.<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=gigaom.com&#038;blog=14960843&#038;post=648537&#038;subd=gigaom2&#038;ref=&#038;feed=1" width="1" height="1" />]]></description>
				<content:encoded><![CDATA[<p>Cloud computing is finally starting to add value to business, as those in charge of cloud within enterprises are moving from talking to doing. That much was very evident in the first quarter of 2013.</p>
<br />  <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=gigaom.com&#038;blog=14960843&#038;post=648537&#038;subd=gigaom2&#038;ref=&#038;feed=1" width="1" height="1" /><p><a href="http://pubads.g.doubleclick.net/gampad/jump?iu=/1008864/GigaOM_RSS_300x250&#038;sz=300x250&#038;c=261991"><img src="http://pubads.g.doubleclick.net/gampad/ad?iu=/1008864/GigaOM_RSS_300x250&#038;sz=300x250&#038;c=261991" /></a></p><p><strong>Related research and analysis from GigaOM Pro:</strong><br />Subscriber content. <a href="http://pro.gigaom.com/?utm_medium=editorial&utm_campaign=auto3&utm_term=648537+cloud-and-data-first-quarter-2013-analysis-and-outlook&utm_content=gigaedit">Sign up for a free trial</a>.</p><ul><li><a href="http://pro.gigaom.com/2011/04/infrastructure-q1-iaas-comes-down-to-earth-big-data-takes-flight/?utm_medium=editorial&utm_campaign=auto3&utm_term=648537+cloud-and-data-first-quarter-2013-analysis-and-outlook&utm_content=gigaedit">Infrastructure Q1: IaaS Comes Down to Earth; Big Data Takes Flight</a></li><li><a href="http://pro.gigaom.com/2012/04/infrastructure-q1-cloud-and-big-data-woo-the-enterprise/?utm_medium=editorial&utm_campaign=auto3&utm_term=648537+cloud-and-data-first-quarter-2013-analysis-and-outlook&utm_content=gigaedit">Infrastructure Q1: Cloud and big data woo enterprises</a></li><li><a href="http://pro.gigaom.com/2011/07/infrastructure-q2-big-data-and-paas-gain-more-momentum/?utm_medium=editorial&utm_campaign=auto3&utm_term=648537+cloud-and-data-first-quarter-2013-analysis-and-outlook&utm_content=gigaedit">Infrastructure Q2: Big data and PaaS gain more momentum</a></li></ul>]]></content:encoded>
			<wfw:commentRss>http://pro.gigaom.com/report/cloud-and-data-first-quarter-2013-analysis-and-outlook/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:thumbnail url="http://pro.gigaom.com/files/2009/04/gigaompromasterimagecloud.jpg?w=150" />
		<media:content url="http://pro.gigaom.com/files/2009/04/gigaompromasterimagecloud.jpg?w=150" medium="image">
			<media:title type="html">gigaompromasterimagecloud</media:title>
		</media:content>

		<media:content url="http://1.gravatar.com/avatar/4f3860069d181dbeeb398304f5940a9e?s=96&#38;d=retro&#38;r=PG" medium="image">
			<media:title type="html">gigaedit</media:title>
		</media:content>
	</item>
		<item>
		<title>Sector RoadMap: SQL-on-Hadoop platforms in 2013</title>
		<link>http://pro.gigaom.com/report/sql-on-hadoop-roadmap-2013/</link>
		<comments>http://pro.gigaom.com/report/sql-on-hadoop-roadmap-2013/#comments</comments>
		<pubDate>Wed, 20 Mar 2013 12:00:16 +0000</pubDate>
		<dc:creator><a href="http://pro.gigaom.com/members/josephturian/" rel="author">Joseph Turian</a></dc:creator>
				<category><![CDATA[Uncategorized]]></category>
		<category><![CDATA[apache]]></category>
		<category><![CDATA[apache-hive]]></category>
		<category><![CDATA[aster]]></category>
		<category><![CDATA[Aster Big Analytics Appliance]]></category>
		<category><![CDATA[big data]]></category>
		<category><![CDATA[BigInsights]]></category>
		<category><![CDATA[Citus Data]]></category>
		<category><![CDATA[CitusDB]]></category>
		<category><![CDATA[Cloud Computing]]></category>
		<category><![CDATA[Cloudera]]></category>
		<category><![CDATA[Clustrix]]></category>
		<category><![CDATA[Concurrent]]></category>
		<category><![CDATA[Database theory]]></category>
		<category><![CDATA[Dremel]]></category>
		<category><![CDATA[Drill]]></category>
		<category><![CDATA[EMC]]></category>
		<category><![CDATA[Google]]></category>
		<category><![CDATA[Greenplum]]></category>
		<category><![CDATA[Hadapt]]></category>
		<category><![CDATA[Hadoop]]></category>
		<category><![CDATA[Hadoop Distributed File System]]></category>
		<category><![CDATA[HAWQ]]></category>
		<category><![CDATA[Hbase]]></category>
		<category><![CDATA[HCatalog]]></category>
		<category><![CDATA[HDFS]]></category>
		<category><![CDATA[hive]]></category>
		<category><![CDATA[Hortonworks]]></category>
		<category><![CDATA[IBM]]></category>
		<category><![CDATA[Impala]]></category>
		<category><![CDATA[JethroData]]></category>
		<category><![CDATA[karmasphere]]></category>
		<category><![CDATA[Lingual]]></category>
		<category><![CDATA[Mapr]]></category>
		<category><![CDATA[mapreduce]]></category>
		<category><![CDATA[MemSQL]]></category>
		<category><![CDATA[microstrategy]]></category>
		<category><![CDATA[MongoDB]]></category>
		<category><![CDATA[MPP]]></category>
		<category><![CDATA[NewSQL]]></category>
		<category><![CDATA[Optiq]]></category>
		<category><![CDATA[Oracle]]></category>
		<category><![CDATA[parallel computing]]></category>
		<category><![CDATA[pig]]></category>
		<category><![CDATA[Platfora]]></category>
		<category><![CDATA[PostGIS]]></category>
		<category><![CDATA[PostgreSQL]]></category>
		<category><![CDATA[PostreSQL]]></category>
		<category><![CDATA[RainStor]]></category>
		<category><![CDATA[Salesforce.com]]></category>
		<category><![CDATA[SAP]]></category>
		<category><![CDATA[SAP HANA]]></category>
		<category><![CDATA[Splice Machine]]></category>
		<category><![CDATA[SQL]]></category>
		<category><![CDATA[SQL 92]]></category>
		<category><![CDATA[SQL-H]]></category>
		<category><![CDATA[SQLStream]]></category>
		<category><![CDATA[Stinger]]></category>
		<category><![CDATA[Stringer]]></category>
		<category><![CDATA[tableau]]></category>
		<category><![CDATA[Teradata]]></category>
		<category><![CDATA[Twitter]]></category>
		<category><![CDATA[VoltDB]]></category>
		<category><![CDATA[zookeeper]]></category>

		<guid isPermaLink="false">http://pro.gigaom.com/?post_type=go-report&#038;p=171512/</guid>
		<description><![CDATA[Today’s most successful companies are the ones with the ability to capture and analyze all data available to them. Enter SQL-on-Hadoop solutions, which increase the accessibility of Hadoop and allow organizations to reuse their investment learning in SQL. <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=gigaom.com&#038;blog=14960843&#038;post=648564&#038;subd=gigaom2&#038;ref=&#038;feed=1" width="1" height="1" />]]></description>
				<content:encoded><![CDATA[<p>Today’s most successful companies are the ones with the ability to capture and analyze all data available to them. Enter SQL-on-Hadoop solutions, which increase the accessibility of Hadoop and allow organizations to reuse their investment learning in SQL. </p>
<br />  <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=gigaom.com&#038;blog=14960843&#038;post=648564&#038;subd=gigaom2&#038;ref=&#038;feed=1" width="1" height="1" /><p><a href="http://pubads.g.doubleclick.net/gampad/jump?iu=/1008864/GigaOM_RSS_300x250&#038;sz=300x250&#038;c=143905"><img src="http://pubads.g.doubleclick.net/gampad/ad?iu=/1008864/GigaOM_RSS_300x250&#038;sz=300x250&#038;c=143905" /></a></p><p><strong>Related research and analysis from GigaOM Pro:</strong><br />Subscriber content. <a href="http://pro.gigaom.com/?utm_medium=editorial&utm_campaign=auto3&utm_term=648564+sql-on-hadoop-roadmap-2013&utm_content=gigaedit">Sign up for a free trial</a>.</p><ul><li><a href="http://pro.gigaom.com/2012/03/a-near-term-outlook-for-big-data/?utm_medium=editorial&utm_campaign=auto3&utm_term=648564+sql-on-hadoop-roadmap-2013&utm_content=gigaedit">A near-term outlook for big data</a></li><li><a href="http://pro.gigaom.com/2012/04/sector-roadmap-hadoop-platforms-2012/?utm_medium=editorial&utm_campaign=auto3&utm_term=648564+sql-on-hadoop-roadmap-2013&utm_content=gigaedit">2012: The Hadoop infrastructure market booms</a></li><li><a href="http://pro.gigaom.com/2011/03/defining-hadoop-the-players-technologies-and-challenges-of-2011/?utm_medium=editorial&utm_campaign=auto3&utm_term=648564+sql-on-hadoop-roadmap-2013&utm_content=gigaedit">Defining Hadoop: the Players, Technologies and Challenges of 2011</a></li></ul>]]></content:encoded>
			<wfw:commentRss>http://pro.gigaom.com/report/sql-on-hadoop-roadmap-2013/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:thumbnail url="https://gigaom-pro-files.s3.amazonaws.com/files/2012/04/elephant.jpg?w=150" />
		<media:content url="https://gigaom-pro-files.s3.amazonaws.com/files/2012/04/elephant.jpg?w=150" medium="image">
			<media:title type="html">elephant</media:title>
		</media:content>

		<media:content url="http://1.gravatar.com/avatar/4f3860069d181dbeeb398304f5940a9e?s=96&#38;d=retro&#38;r=PG" medium="image">
			<media:title type="html">gigaedit</media:title>
		</media:content>
	</item>
		<item>
		<title>Meet Kaggle Connect: matchmaker for data scientists and companies that need them</title>
		<link>http://gigaom.com/2013/03/05/kaggle-connect-matchmaker-for-data-scientists-and-companies-that-need-them/</link>
		<comments>http://gigaom.com/2013/03/05/kaggle-connect-matchmaker-for-data-scientists-and-companies-that-need-them/#comments</comments>
		<pubDate>Tue, 05 Mar 2013 16:00:56 +0000</pubDate>
		<dc:creator>Barb Darrow</dc:creator>
				<category><![CDATA[Anthony Goldbloom]]></category>
		<category><![CDATA[big data]]></category>
		<category><![CDATA[EMC]]></category>
		<category><![CDATA[Greenplum]]></category>
		<category><![CDATA[kaggle]]></category>
		<category><![CDATA[Kaggle Connect]]></category>

		<guid isPermaLink="false">http://gigaom.com/?p=616635</guid>
		<description><![CDATA[Putting the right brains on the right problems is the goal of Kaggle Connect. <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=gigaom.com&#038;blog=14960843&#038;post=616635&#038;subd=gigaom2&#038;ref=&#038;feed=1" width="1" height="1" />]]></description>
				<content:encoded><![CDATA[<p>Here are two certainties about big data. One is that companies need good data scientists. The other is that identifying good data scientists ain&#8217;t easy. That&#8217;s why Kaggle, the <a href="http://gigaom.com/2012/09/12/can-kaggle-make-data-science-a-spectator-sport/">data science competition platform</a>, is launching Kaggle Connect to link proven data science performers with companies willing to pay for their expertise.</p>
<p>Everyone calls himself a data scientist now &#8212; and <a href="http://gigaom.com/2012/02/17/big-data-skills-bring-big-dough/">there&#8217;s a reason for that.</a> The title &#8220;gets you 40 percent more money,&#8221; says Kaggle CEO <a href="http://www.kaggle.com/careers/team">Anthony Goldbloom.</a> &#8221;The problem is that it&#8217;s hard to know how good someone really is until six months down the road when you realize they haven&#8217;t done anything.&#8221;</p>
<p>His argument is that folks who have done well in Kaggle competitions over the past two years &#8212; insurance actuaries, mathematicians, students, chemists &#8212; have proven they have what it takes.</p>
<p>And Kaggle bona fides are becoming currency. <a href="http://www.linkedin.com/jobs?viewJob=&amp;jobId=4902477">This job posting</a> for a<em> New York Times</em> data scientist lists participation in a Kaggle competition as a key criterion.</p>
<h2 id="connecting-the-right-data-scie">Connecting the right data scientists with the right problems</h2>
<p>With Kaggle Connect, the company is making its two top tiers of competitors &#8212; it&#8217;s an invitation-only list &#8212; available to companies on an individual basis. &#8220;If Pfizer comes to us with a problem that is maybe not well specified enough and needs more iteration than a competition would allow, we can provide a data scientist that suits that problem,&#8221; Goldbloom said.</p>
<p><a href="http://gigaom.com/2013/03/05/kaggle-connect-matchmaker-for-data-scientists-and-companies-that-need-them/kaggleranks2-2/" rel="attachment wp-att-616775"><img  alt="kaggleranks2" src="http://gigaom2.files.wordpress.com/2013/03/kaggleranks21.jpg?w=708&#038;h=428" width="708" height="428" class="aligncenter size-full wp-image-616775" /></a></p>
<p>The customer pays a subscription cost of somewhere between $30,000 and $100,000 per month to gain access to appropriate data science resources. Kaggle gets a cut of that money and the data scientist gets the rest &#8212; although Kaggle is not breaking out the percentages.</p>
<p>In the interactive chart below, click on the map to bring up the name, picture and profile of the Kaggle Connect member.<br />
<iframe src="http://kaggle.cartodb.com/tables/kaggle_connect_members/embed_map?title=true&amp;description=true&amp;search=false&amp;shareable=false&amp;cartodb_logo=true&amp;sql=&amp;zoom=0&amp;center_lat=39.36827914916011&amp;center_lon=150.46875" height="400" width="400" frameborder="0"></iframe></p>
<p>What Kaggle brings to the table is a roster of people who have performed well in its competitions. What the companies provide is a juicy problem to solve and data to use in that quest. In some ways this is an extension of what Kaggle has already done with <a href="http://gigaom.com/2012/10/23/greenplum-kaggle-play-big-data-matchmakers/">EMC&#8217;s Greenplum division</a>, although that project required the use of Greenplum&#8217;s Chorus toolset.<br />
<a href="http://gigaom.com/?attachment_id=616731" rel="attachment wp-att-616731"><img  alt="kaggleuserspecialty" src="http://gigaom2.files.wordpress.com/2013/03/kaggleuserspecialty.jpg?w=708&#038;h=395" width="708" height="395" class="aligncenter size-full wp-image-616731" /></a><br />
The top two of eight total tiers of 80,000 contestants will initially serve as the invitation-only talent pool for Kaggle Connect. That&#8217;s about 1,500 Kagglers (if that&#8217;s a word). Kaggle began running data science competitions in early 2012 and started <a href="http://gigaom.com/2012/09/12/can-kaggle-make-data-science-a-spectator-sport/">publishing its leaderboard</a> of top big data problem solvers last September.</p>
<p>We&#8217;ll see how this all proves out, but if Kaggle success is really a predictor of big data chops writ large, expect to see a lot more Kaggle boasts on resumes going forward.</p>
<p><em>Feature photo courtesy of Shutterstock user Dirk Ercken.</em></p>
<br />  <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=gigaom.com&#038;blog=14960843&#038;post=616635&#038;subd=gigaom2&#038;ref=&#038;feed=1" width="1" height="1" /><p><a href="http://pubads.g.doubleclick.net/gampad/jump?iu=/1008864/GigaOM_RSS_300x250&#038;sz=300x250&#038;c=573972"><img src="http://pubads.g.doubleclick.net/gampad/ad?iu=/1008864/GigaOM_RSS_300x250&#038;sz=300x250&#038;c=573972" /></a></p><p><strong>Related research and analysis from GigaOM Pro:</strong><br />Subscriber content. <a href="http://pro.gigaom.com/?utm_source=data&utm_medium=editorial&utm_campaign=auto3&utm_term=616635+kaggle-connect-matchmaker-for-data-scientists-and-companies-that-need-them&utm_content=gigabarb">Sign up for a free trial</a>.</p><ul><li><a href="http://pro.gigaom.com/2012/03/a-near-term-outlook-for-big-data/?utm_source=data&utm_medium=editorial&utm_campaign=auto3&utm_term=616635+kaggle-connect-matchmaker-for-data-scientists-and-companies-that-need-them&utm_content=gigabarb">A near-term outlook for big data</a></li><li><a href="http://pro.gigaom.com/2012/01/12-tech-leaders-resolutions-for-2012/?utm_source=data&utm_medium=editorial&utm_campaign=auto3&utm_term=616635+kaggle-connect-matchmaker-for-data-scientists-and-companies-that-need-them&utm_content=gigabarb">12 tech leaders’ resolutions for 2012</a></li><li><a href="http://pro.gigaom.com/2012/04/infrastructure-q1-cloud-and-big-data-woo-the-enterprise/?utm_source=data&utm_medium=editorial&utm_campaign=auto3&utm_term=616635+kaggle-connect-matchmaker-for-data-scientists-and-companies-that-need-them&utm_content=gigabarb">Infrastructure Q1: Cloud and big data woo enterprises</a></li></ul>]]></content:encoded>
			<wfw:commentRss>http://gigaom.com/2013/03/05/kaggle-connect-matchmaker-for-data-scientists-and-companies-that-need-them/feed/</wfw:commentRss>
		<slash:comments>2</slash:comments>
	
		<media:thumbnail url="http://gigaom2.files.wordpress.com/2013/03/shutterstock_125574617.jpg?w=150" />
		<media:content url="http://gigaom2.files.wordpress.com/2013/03/shutterstock_125574617.jpg?w=150" medium="image">
			<media:title type="html">Big Data</media:title>
		</media:content>

		<media:content url="http://1.gravatar.com/avatar/4af03439988d64f816da72496325cb73?s=96&#38;d=retro&#38;r=PG" medium="image">
			<media:title type="html">gigabarb</media:title>
		</media:content>

		<media:content url="http://gigaom2.files.wordpress.com/2013/03/kaggleranks21.jpg" medium="image">
			<media:title type="html">kaggleranks2</media:title>
		</media:content>

		<media:content url="http://gigaom2.files.wordpress.com/2013/03/kaggleuserspecialty.jpg" medium="image">
			<media:title type="html">kaggleuserspecialty</media:title>
		</media:content>
	</item>
		<item>
		<title>EMC to Hadoop competition: &#8220;See ya, wouldn&#8217;t wanna be ya.&#8221;</title>
		<link>http://gigaom.com/2013/02/25/emc-to-hadoop-competition-see-ya-wouldnt-wanna-be-ya/</link>
		<comments>http://gigaom.com/2013/02/25/emc-to-hadoop-competition-see-ya-wouldnt-wanna-be-ya/#comments</comments>
		<pubDate>Mon, 25 Feb 2013 18:00:02 +0000</pubDate>
		<dc:creator>Derrick Harris</dc:creator>
				<category><![CDATA[analytics]]></category>
		<category><![CDATA[big data]]></category>
		<category><![CDATA[database]]></category>
		<category><![CDATA[EMC]]></category>
		<category><![CDATA[EMC Greenpum]]></category>
		<category><![CDATA[Greenplum]]></category>
		<category><![CDATA[Hadoop]]></category>
		<category><![CDATA[SQL]]></category>

		<guid isPermaLink="false">http://gigaom.com/?p=613686</guid>
		<description><![CDATA[EMC Greenplum rolled out a new Hadoop distribution that fuses the popular big data platform with its flagship MPP database technology. Co-founder Scott Yara thinks the company's huge investment puts it in the catbird seat among Hadoop vendors.<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=gigaom.com&#038;blog=14960843&#038;post=613686&#038;subd=gigaom2&#038;ref=&#038;feed=1" width="1" height="1" />]]></description>
				<content:encoded><![CDATA[<p>If, like many industry watchers, you’ve been confused about EMC Greenplum’s Hadoop strategy over the past couple years, Scott Yara has a message for you: “We’re all in on Hadoop, period.”</p>
<p>Yara, Greenplum’s co-founder and senior vice president of products, has a not-so-coded message for his big data market competitors, too. Put simply, he doesn’t think they stand a chance against his company, and he served notice on Monday morning with the unveiling of the company’s new Pivotal HD Hadoop distribution and Project Hawq in a staged event at San Francisco’s Dogpatch Studios.</p>
<p>Pivotal HD is a completely re-architected Hadoop distribution that has been natively fused with Greenplum’s analytic database (that’s the Project Hawq part), but Yara thinks it’s a bigger deal than <a href="http://gigaom.com/2013/02/21/sql-is-whats-next-for-hadoop-heres-whos-doing-it/">just another SQL-on-Hadoop play</a>. In an interview last week, Yara told me that Project Hawq is the manifestation of Greenplum’s <a href="http://gigaom.com/2010/07/06/emc-buys-greenplum/">decision to sell itself to EMC in 2010</a>, a move he thought would would kickstart his company’s founding vision of becoming the leading big data platform.</p>
<h2 id="building-a-data-platform-costs">Building a data platform costs money, and lots of it</h2>
<p>But before the details, a little history. Greenplum’s flagship product is an analytic database powered by a massively parallel processing (MPP) and query engine. The company had raised nearly $100 million in venture capital around this technology since launching in 2003, but doing business in the enterprise software world is hard and expensive, and Greenplum needed more money.</p>
<div id="attachment_502146" class="wp-caption alignleft" style="width: 310px"><img alt="Rob Me of Pivotal Labs, Scott Yara of EMC, and Om Malik of GigaOM at Structure:Data 2012" src="http://gigaom2.files.wordpress.com/2012/03/1z5o1154.jpg?w=300&#038;h=200" width="300" height="200" class="size-medium wp-image-502146"><p class="wp-caption-text">Yara (left) with Pivotal Labs CEO Rob Me and Om Malik at Structure: Data 2012 (c) 2012 Pinar Ozger. pinar@pinarozger.com</p></div>
<p>“I thought it was going to take another couple hundred million dollars in investment for us to complete the technical vision we had and go to market,” Yara explained. But finding that kind of money wasn’t so easy in an investment environment where everyone was gaga over social apps like Facebook and Zynga. When EMC approached with a deal like it gave VMware in 2003 — essentially near complete independence bolstered by a huge R&amp;D and marketing budget — Greenplum couldn’t refuse.</p>
<p>Yara said Greenplum had known for a while that Hadoop was the key to any big data strategy going forward, but that it would take some time to build up its own technology. So, in 2011, it <a href="http://gigaom.com/2011/05/09/emc-hadoop/">entered into a reseller agreement with Hadoop startup MapR</a> to offer a premium product to appease enterprise customers while Greenplum’s engineers got to work on what would become Pivotal HD. That deal with MapR is still in place, but it’s no longer the focal point of Greenplum’s Hadoop strategy.</p>
<h2 id="big-investment-big-aspirations">Big investment, big aspirations</h2>
<p>The technology inside Pivotal HD is what companies should come to expect from a Hadoop distribution, Yara explained. It’s essentially the Greenplum Database with its POSIX file system ripped out and replaced by the Hadoop Distributed File System. Whatever users can do on Greenplum’s flagship database, they can do on Pivotal HD, only they can run Hadoop MapReduce jobs and house an HBase database, too.</p>
<p><img alt="hawq" src="http://gigaom2.files.wordpress.com/2013/02/hawq.jpg?w=708&#038;h=386" width="708" height="386" class="aligncenter size-large wp-image-613705"></p>
<p>And when SQL-like features become an important part of Hadoop because it’s so broadly installed that users are now seeking out broader utility, “that’s when the bar gets raised in terms of the amount of capability that’s required,” Yara said. He said Pivotal HD includes years worth of investment in Hadoop cluster-management technology and professional support, too, and that they will cost half as much as what Cloudera and Hortonworks charge. It’s designed to run smoothly wherever customers want it to — physical servers, virtual servers or even cloud servers.</p>
<p><a href="http://structuredata2013-editgraphic.eventbrite.com/"><img alt="Structure:Data: Put data to work. 60+ big data experts speaking. March 20-21, 2013, New York City. Register now." src="http://gigaom2.files.wordpress.com/2013/02/structure-data_in-article-banners_300x2001.png?w=708"   class="alignright size-full wp-image-610577"></a>Because they’re so new, he said, competitive SQL-on-Hadoop offerings such as <a href="http://gigaom.com/2012/10/24/cloudera-makes-sql-a-first-class-citizen-in-hadoop/">Cloudera’s Impala</a> can only handle about 20 percent of real-world workloads. Looking back at the capital investment in analytics and big data technologies past, things like Netezza, Teradata and Aster Data, Yara proffered, “I don’t think you could build [a full SQL-on-Hadoop] system for less than $25 to $50 million over five years.” (Some of those new technologies, by the way, will have a chance to state their cases during a <a href="http://event.gigaom.com/structuredata/?utm_source=data&amp;utm_medium=editorial&amp;utm_campaign=intext&amp;utm_term=613686+emc-to-hadoop-competition-see-ya-wouldnt-wanna-be-ya&amp;utm_content=dharrisstructure">Structure: Data</a> panel on March 21 that’s all about Hadoop as the next-generation business intelligence platform.)</p>
<p>Greenplum, by contrast, rebuilt its entire R&amp;D team to focus on bringing 10 years of database technology to Hadoop. “We literally have over 300 engineers working on our Hadoop platform,” Yara said. “… We’re bringing all the power of EMC and VMware behind it.”</p>
<h2 id="the-data-warehouse-is-the-new-">The data warehouse is the new mainframe</h2>
<p>Looking past his competitive boasting, though, it’s easy to see Yara’s greater point when you ask him what all this Hadoop talks means for the data warehouse business on which Greenplum was built. He points to the mainframe business that fell from its high perch decades ago but still drives billions a year in revenue. A single MPP database system is still faster on certain workloads than SQL on Hadoop, but that gap will close over time and  “I do think the center of gravity will move toward HDFS,” he said.</p>
<p>Josh Klahr, a Pivotal HD product manager, noted the importance of being able to process all of a company’s data right in a single scalable data store rather than operating numerous systems. He pointed to one customer that’s storing a petabyte of data in Greenplum Database but wants to grow its data volume to 20 petabyes over the next few years and needs something like Hadoop to do that both financially and technically. He said Netflix’s <a href="http://gigaom.com/2013/01/10/netflix-shows-off-its-hadoop-architecture/">decision to store all its data in Amazon S3</a> and bring analytic services to it is a good indicator of where the market is headed.</p>
<p>A few years ago, Yara acknowledged, embracing Hadoop as the future might have been a scary proposition. However, he said, “Now, if you don’t embrace Hadooop as the new database platform, if you’re a database vendor, that’s a grave mistake.”</p>
<br />  <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=gigaom.com&#038;blog=14960843&#038;post=613686&#038;subd=gigaom2&#038;ref=&#038;feed=1" width="1" height="1" /><p><a href="http://pubads.g.doubleclick.net/gampad/jump?iu=/1008864/GigaOM_RSS_300x250&#038;sz=300x250&#038;c=904304"><img src="http://pubads.g.doubleclick.net/gampad/ad?iu=/1008864/GigaOM_RSS_300x250&#038;sz=300x250&#038;c=904304" /></a></p><p><strong>Related research and analysis from GigaOM Pro:</strong><br />Subscriber content. <a href="http://pro.gigaom.com/?utm_source=data&utm_medium=editorial&utm_campaign=auto3&utm_term=613686+emc-to-hadoop-competition-see-ya-wouldnt-wanna-be-ya&utm_content=dharrisstructure">Sign up for a free trial</a>.</p><ul><li><a href="http://pro.gigaom.com/2012/04/infrastructure-q1-cloud-and-big-data-woo-the-enterprise/?utm_source=data&utm_medium=editorial&utm_campaign=auto3&utm_term=613686+emc-to-hadoop-competition-see-ya-wouldnt-wanna-be-ya&utm_content=dharrisstructure">Infrastructure Q1: Cloud and big data woo enterprises</a></li><li><a href="http://pro.gigaom.com/2012/03/a-near-term-outlook-for-big-data/?utm_source=data&utm_medium=editorial&utm_campaign=auto3&utm_term=613686+emc-to-hadoop-competition-see-ya-wouldnt-wanna-be-ya&utm_content=dharrisstructure">A near-term outlook for big data</a></li><li><a href="http://pro.gigaom.com/2010/09/the-red-hot-data-warehouse-market-whos-buying-next/?utm_source=data&utm_medium=editorial&utm_campaign=auto3&utm_term=613686+emc-to-hadoop-competition-see-ya-wouldnt-wanna-be-ya&utm_content=dharrisstructure">The Red-Hot Data Warehouse Market: Who&#8217;s Buying Next?</a></li></ul>]]></content:encoded>
			<wfw:commentRss>http://gigaom.com/2013/02/25/emc-to-hadoop-competition-see-ya-wouldnt-wanna-be-ya/feed/</wfw:commentRss>
		<slash:comments>6</slash:comments>
	
		<media:thumbnail url="http://gigaom2.files.wordpress.com/2013/01/1z5o1096.jpg?w=150" />
		<media:content url="http://gigaom2.files.wordpress.com/2013/01/1z5o1096.jpg?w=150" medium="image">
			<media:title type="html">Structure Data 2012: Scott Yara – SVP, Products and Co-Founder, Greenplum, a division of EMC</media:title>
		</media:content>

		<media:content url="http://0.gravatar.com/avatar/9e48ffa0913f65c577727457dd63023f?s=96&#38;d=retro&#38;r=PG" medium="image">
			<media:title type="html">dharrisstructure</media:title>
		</media:content>

		<media:content url="http://gigaom2.files.wordpress.com/2012/03/1z5o1154.jpg?w=300" medium="image">
			<media:title type="html">Rob Me of Pivotal Labs, Scott Yara of EMC, and Om Malik of GigaOM at Structure:Data 2012</media:title>
		</media:content>

		<media:content url="http://gigaom2.files.wordpress.com/2013/02/hawq.jpg?w=708" medium="image">
			<media:title type="html">hawq</media:title>
		</media:content>

		<media:content url="http://gigaom2.files.wordpress.com/2013/02/structure-data_in-article-banners_300x2001.png" medium="image">
			<media:title type="html">Structure:Data: Put data to work. 60+ big data experts speaking. March 20-21, 2013, New York City. Register now.</media:title>
		</media:content>
	</item>
		<item>
		<title>VMware sharpens its focus &#8212; and its knife</title>
		<link>http://gigaom.com/2013/01/28/vmware-sharpens-its-focus-and-its-knife/</link>
		<comments>http://gigaom.com/2013/01/28/vmware-sharpens-its-focus-and-its-knife/#comments</comments>
		<pubDate>Mon, 28 Jan 2013 23:46:49 +0000</pubDate>
		<dc:creator>Barb Darrow</dc:creator>
				<category><![CDATA[Cloud Computing]]></category>
		<category><![CDATA[EMC]]></category>
		<category><![CDATA[Greenplum]]></category>
		<category><![CDATA[Intel]]></category>
		<category><![CDATA[Jonathan Chadwick]]></category>
		<category><![CDATA[Pat Gelsinger]]></category>
		<category><![CDATA[sliderocket]]></category>
		<category><![CDATA[VMWare]]></category>
		<category><![CDATA[Zimbra]]></category>

		<guid isPermaLink="false">http://gigaom.com/?p=605152</guid>
		<description><![CDATA[Hybrid cloud, software-defined data center, and end-user computing are all in. Other VMware efforts will be out in the upcoming year as the company seeks to regain focus. <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=gigaom.com&#038;blog=14960843&#038;post=605152&#038;subd=gigaom2&#038;ref=&#038;feed=1" width="1" height="1" />]]></description>
				<content:encoded><![CDATA[<p>VMware’s future is all about hybrid cloud, software-defined data center and end-user computing, and not so much about other things in its portfolio which will be “de-emphasized” this coming year, VMware CEO Pat Gelsinger told analysts on the company’s <a href="http://www.marketwatch.com/story/vmware-reports-record-fourth-quarter-and-full-year-2012-results-2013-01-28">fourth quarter earnings call</a> Monday night.</p>
<p><a href="http://gigaom.com/2013/01/28/vmware-sharpens-its-focus-and-its-knife/newvmwarelogo/" rel="attachment wp-att-605218"><img alt="new vmware logo" src="http://gigaom2.files.wordpress.com/2013/01/newvmwarelogo.jpg?w=300&#038;h=108" width="300" height="108" class="alignleft size-medium wp-image-605218"></a>“I’ve learned the importance of prioritization and execution,” said Gelsinger, an Intel and EMC veteran executive who came aboard as VMware CEO five months ago. ”We will focus first on a portfolio rationalization around the products our customers care most about. Our decision to commit our cloud application efforts to Pivotal is an example. And we will realign resources as we scale back in some areas,” he said.</p>
<p>The <a href="http://gigaom.com/2012/12/04/and-whomp-here-it-is-the-pivotal-initiative-brought-to-you-by-vmware-and-emc/">Pivotal Initiative</a> — a spin off of VMware and EMC that will provide cloud and big data applications — will draw on VMware’s Cloud Foundry platform as a service, vFabric Java framework, and <a href="http://gigaom.com/2012/04/24/vmware-buys-big-data-startup-cetas/">Cetas</a> big data resources as well as Greenplum analytics and Pivotal Labs agile development expertise from EMC.</p>
<h2 id="key-to-vmware-focus-focus-focu">Key to VMware: Focus, focus, focus</h2>
<p>Gelsinger did not name areas that will be subject to cutbacks, but CFO Jonathan Chadwick said Sliderocket will be sidelined. VMware bought <a href="http://gigaom.com/2011/04/26/vmware-buys-sliderocket-in-a-race-to-provide-collaboration-2/">SlideRocket</a> and its slide presentation software in 2011. It was seen as a complement to the <a href="http://paidcontent.org/2010/05/11/419-yahoos-net-proceeds-from-sale-of-zimbra-100-million/?like=1">Zimbra productivity software</a> purchased earlier from Yahoo. Depending on how VMware defines end user computing — my guess is it means desktop virtualization rather than desktop apps — I’d expect more cuts in this area. SocialCast might also be de-emphasized.</p>
<p>The company will also cut 900 jobs. “VMware added 6,700 people over three years and we’ll continue to grow, invest and hire in support of our focus areas,” Gelsinger said. “We expect to close fiscal 2013 up 1,000 people.”</p>
<p>There was no information given about the Pivotal Initiative spin out but Gelsinger promised more discussion of that will come at a March 13 at VMware EMC Strategy Summit in New York.</p>
<p>As for earnings, for its fourth quarter, VMware logged a profit of $206 million, or 47 cents per share, up from $200 million, or 46 cents per share, for the year-ago period.  Revenue grew 22 percent year-over-year to $1.29 billion with adjusted net income at 81 cents a share, beating estimates of 78 cents per share on revenue of $1.28 billion.</p>
<p>More to the point though was <a href="http://www.bloomberg.com/news/2013-01-28/vmware-sales-forecast-misses-estimates-as-demand-slumps.html">low guidance for the upcoming quarter.</a> VMware expects first quarter revenue to come in between $1.17 billion to $1.19 billion, short of consensus estimates of $1.25 billion and that spooked investors who drove the price down in extended hours trading.</p>
<h2 id="moving-on-from-server-virtuali">Moving on from server virtualization</h2>
<p>VMware is at a crossroads. It continues to lead the market in server virtualization but faces increasing competition there from Microsoft Hyper-V, and open-source Xen and KVM alternatives there. So it’s changing the conversation to network virtualization — a key underpinning of the aforementioned software-defined data center, according to VMware president Carl Eschenbach. VMware is counting on its <a href="http://gigaom.com/2012/07/23/vmware-to-buy-nicira-for-1-26b-in-a-strategic-leap-of-faith/">$1.26 billion buyout of Nicira </a>to give it a leg up in that network virtualization quest.</p>
<p><em>VMware CEO <a href="http://gigaom.com/2013/01/08/vmware-ceo-pat-gelsinger-to-speak-at-structure-2013/">Pat Gelsinger </a>will speak at <a href="http://event.gigaom.com/structure/?utm_source=cloud&amp;utm_medium=editorial&amp;utm_campaign=intext&amp;utm_term=605152+vmware-sharpens-its-focus-and-its-knife&amp;utm_content=gigabarb">GigaOM’s Structure 2013 conference</a> in San Francisco June 19-20.</em></p>
<br />  <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=gigaom.com&#038;blog=14960843&#038;post=605152&#038;subd=gigaom2&#038;ref=&#038;feed=1" width="1" height="1" /><p><a href="http://pubads.g.doubleclick.net/gampad/jump?iu=/1008864/GigaOM_RSS_300x250&#038;sz=300x250&#038;c=421449"><img src="http://pubads.g.doubleclick.net/gampad/ad?iu=/1008864/GigaOM_RSS_300x250&#038;sz=300x250&#038;c=421449" /></a></p><p><strong>Related research and analysis from GigaOM Pro:</strong><br />Subscriber content. <a href="http://pro.gigaom.com/?utm_source=cloud&utm_medium=editorial&utm_campaign=auto3&utm_term=605152+vmware-sharpens-its-focus-and-its-knife&utm_content=gigabarb">Sign up for a free trial</a>.</p><ul><li><a href="http://pro.gigaom.com/2011/07/infrastructure-q2-big-data-and-paas-gain-more-momentum/?utm_source=cloud&utm_medium=editorial&utm_campaign=auto3&utm_term=605152+vmware-sharpens-its-focus-and-its-knife&utm_content=gigabarb">Infrastructure Q2: Big data and PaaS gain more momentum</a></li><li><a href="http://pro.gigaom.com/2010/10/ma-alive-and-well-in-q3/?utm_source=cloud&utm_medium=editorial&utm_campaign=auto3&utm_term=605152+vmware-sharpens-its-focus-and-its-knife&utm_content=gigabarb">In Q3, Big Data Meant Big Dollars</a></li><li><a href="http://pro.gigaom.com/2012/07/cloud-and-data-second-quarter-2012-analysis-and-outlook-2/?utm_source=cloud&utm_medium=editorial&utm_campaign=auto3&utm_term=605152+vmware-sharpens-its-focus-and-its-knife&utm_content=gigabarb">Takeaways from the second quarter in cloud and data</a></li></ul>]]></content:encoded>
			<wfw:commentRss>http://gigaom.com/2013/01/28/vmware-sharpens-its-focus-and-its-knife/feed/</wfw:commentRss>
		<slash:comments>1</slash:comments>
	
		<media:thumbnail url="http://gigaom2.files.wordpress.com/2012/08/patgelsinger-e1346170592458.jpg?w=150" />
		<media:content url="http://gigaom2.files.wordpress.com/2012/08/patgelsinger-e1346170592458.jpg?w=150" medium="image">
			<media:title type="html">Pat Gelsinger</media:title>
		</media:content>

		<media:content url="http://1.gravatar.com/avatar/4af03439988d64f816da72496325cb73?s=96&#38;d=retro&#38;r=PG" medium="image">
			<media:title type="html">gigabarb</media:title>
		</media:content>

		<media:content url="http://gigaom2.files.wordpress.com/2013/01/newvmwarelogo.jpg?w=300" medium="image">
			<media:title type="html">new vmware logo</media:title>
		</media:content>
	</item>
		<item>
		<title>Greenplum and Kaggle launch big data matchmaking service</title>
		<link>http://gigaom.com/2012/10/23/greenplum-kaggle-play-big-data-matchmakers/</link>
		<comments>http://gigaom.com/2012/10/23/greenplum-kaggle-play-big-data-matchmakers/#comments</comments>
		<pubDate>Tue, 23 Oct 2012 12:03:13 +0000</pubDate>
		<dc:creator>Barb Darrow</dc:creator>
				<category><![CDATA[big data]]></category>
		<category><![CDATA[EMC]]></category>
		<category><![CDATA[Gnip]]></category>
		<category><![CDATA[Greenplum]]></category>
		<category><![CDATA[kaggle]]></category>
		<category><![CDATA[tableau]]></category>
		<category><![CDATA[Twitter]]></category>

		<guid isPermaLink="false">http://gigaom.com/?p=576147</guid>
		<description><![CDATA[EMC's Greenplum division hopes to encourage users of its Chorus big data application to reach out to the Kaggle community of data scientists to do real-world work. The company also inked partnerships with Gnip and Tableau and open-sourced a version of Chorus.<img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=gigaom.com&#038;blog=14960843&#038;post=576147&#038;subd=gigaom2&#038;ref=&#038;feed=1" width="1" height="1" />]]></description>
				<content:encoded><![CDATA[<p>Sometimes it&#8217;s hard for data scientists and big data sets to find each other. That&#8217;s the problem that EMC&#8217;s Greenplum division and Kaggle are taking on with a new partnership. <a href="http://gigaom.com/data/can-kaggle-make-data-science-a-spectator-sport/">Kaggle</a> is a predictive modeling platform that sponsors competitions in which data scientists compete to solve big data problems.</p>
<p>Under the new alliance, Kaggle&#8217;s community of big data eggheads can use <a href="http://gigaom.com/cloud/emc-gets-it-big-data-needs-apps-too/">Greenplum&#8217;s Chorus big data application</a> to solve real-world problems.</p>
<p>&#8220;We&#8217;ve had good adoption of Chorus and companies&#8217; internal data workers are using it to do data science so they now have the tools, but honestly they don&#8217;t have all the people they need,&#8221; said Josh Klahr,  VP of product management for Greenplum. &#8220;Now you can search the Kaggle community based on rank, expertise, location and invite them to work on your challenge using Greenplum Chorus.&#8221;</p>
<h2>Playing Yenta to big data players</h2>
<p>Kaggle ranks its participants much the way the USTA ranks tennis players. And that community is growing fast &#8212; when Kaggle started fundraising in August, there were 11,000 members, now there are close to 60,000, said Anthony Goldbloom, CEO of Kaggle, who said this is the first such vendor partnership Kaggle has done. (<a href="http://gigaom.com/data/forget-your-fancy-data-science-try-overkill-analytics/">GigaOM has worked with Kaggle and Splunk </a>on the <a href="http://www.kaggle.com/c/predict-wordpress-likes">GigaOM WordPress Challenge: Splunk Innovation Prospect</a>.)</p>
<p>Also on the partner ecosystem front, Greenplum inked a deal that gives Chorus users access to <a href="http://gnip.com/">Gnip&#8217;s</a> historical Twitter feeds and will let Chorus users import Twitter streams into their Chorus sandbox for analysis. And finally, Chorus is partnering with <a href="http://www.tableau.com/">Tableau</a>, the popular analytics tool so that users can provision Tableau workbooks from their Chorus data sources.</p>
<p>Big data is one area where building a broad ecosystem of data providers is incredibly important. Putting good data scientists together with great data sets is incredibly important, said Ben Woo, managing director of research firm <a href="http://www.neuralytics.com/">Neuralytics</a>. &#8220;Big data is awfully short on the kinds of people who&#8217;ve done this work before and, frankly, people who give a damn. This sort of matchmaking is valuable.&#8221;</p>
<h2>Goal: Melding public and private data to spark new insights</h2>
<p>This convergence of publicly available &#8220;sentiment&#8221; data from sources like Twitter and internal business data lets data scientists ask interesting questions or find interesting questions to ask. For example, a pharmaceutical company has lots of its own data on a new drug. What it may not have is the sort of information about unforeseen side effects that might surface on Twitter or blogs after the drug is released.  &#8221;If you can match Twitter feeds and patient forums, you can find out unexpected things &#8212; see that maybe people are switching from your drug to another. Analyzing that discussion can be incredibly important,&#8221;  Goldbloom said.</p>
<p>In related news, Greenplum, as promised last spring, is open sourcing Chorus as the <a href="http://www.openchorus.org/">OpenChorus Project</a> under the Apache 2.0 license.</p>
<p><em><a title="Attribution License" href="http://creativecommons.org/licenses/by/2.0/">Feature photo courtesy of</a> Flickr user <a href="http://www.flickr.com/photos/kevinkrejci/">Kevin Krejci</a></em></p>
<br />  <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=gigaom.com&#038;blog=14960843&#038;post=576147&#038;subd=gigaom2&#038;ref=&#038;feed=1" width="1" height="1" /><p><a href="http://pubads.g.doubleclick.net/gampad/jump?iu=/1008864/GigaOM_RSS_300x250&#038;sz=300x250&#038;c=326724"><img src="http://pubads.g.doubleclick.net/gampad/ad?iu=/1008864/GigaOM_RSS_300x250&#038;sz=300x250&#038;c=326724" /></a></p><p><strong>Related research and analysis from GigaOM Pro:</strong><br />Subscriber content. <a href="http://pro.gigaom.com/?utm_source=data&utm_medium=editorial&utm_campaign=auto3&utm_term=576147+greenplum-kaggle-play-big-data-matchmakers&utm_content=gigabarb">Sign up for a free trial</a>.</p><ul><li><a href="http://pro.gigaom.com/2012/01/12-tech-leaders-resolutions-for-2012/?utm_source=data&utm_medium=editorial&utm_campaign=auto3&utm_term=576147+greenplum-kaggle-play-big-data-matchmakers&utm_content=gigabarb">12 tech leaders’ resolutions for 2012</a></li><li><a href="http://pro.gigaom.com/2012/03/a-near-term-outlook-for-big-data/?utm_source=data&utm_medium=editorial&utm_campaign=auto3&utm_term=576147+greenplum-kaggle-play-big-data-matchmakers&utm_content=gigabarb">A near-term outlook for big data</a></li><li><a href="http://pro.gigaom.com/2012/05/the-importance-of-putting-the-u-and-i-in-visualization/?utm_source=data&utm_medium=editorial&utm_campaign=auto3&utm_term=576147+greenplum-kaggle-play-big-data-matchmakers&utm_content=gigabarb">The importance of putting the U and I in visualization</a></li></ul>]]></content:encoded>
			<wfw:commentRss>http://gigaom.com/2012/10/23/greenplum-kaggle-play-big-data-matchmakers/feed/</wfw:commentRss>
		<slash:comments>0</slash:comments>
	
		<media:thumbnail url="http://gigaom2.files.wordpress.com/2012/10/6259499293_b577b94cfd_z.jpg?w=150" />
		<media:content url="http://gigaom2.files.wordpress.com/2012/10/6259499293_b577b94cfd_z.jpg?w=150" medium="image">
			<media:title type="html">big data</media:title>
		</media:content>

		<media:content url="http://1.gravatar.com/avatar/4af03439988d64f816da72496325cb73?s=96&#38;d=retro&#38;r=PG" medium="image">
			<media:title type="html">gigabarb</media:title>
		</media:content>
	</item>
		<item>
		<title>6 things we need to know from VMware</title>
		<link>http://gigaom.com/2012/08/24/6-things-we-need-to-know-from-vmware/</link>
		<comments>http://gigaom.com/2012/08/24/6-things-we-need-to-know-from-vmware/#comments</comments>
		<pubDate>Fri, 24 Aug 2012 12:49:44 +0000</pubDate>
		<dc:creator>Barb Darrow</dc:creator>
				<category><![CDATA[Amazon]]></category>
		<category><![CDATA[Diane Greene]]></category>
		<category><![CDATA[EMC]]></category>
		<category><![CDATA[Greenplum]]></category>
		<category><![CDATA[joe tucci]]></category>
		<category><![CDATA[Microsoft]]></category>
		<category><![CDATA[Pat Gelsinger]]></category>
		<category><![CDATA[Paul Maritz]]></category>
		<category><![CDATA[VMWare]]></category>
		<category><![CDATA[VMworld]]></category>

		<guid isPermaLink="false">http://gigaom.com/?p=555304</guid>
		<description><![CDATA[As VMware transitions from CEO Paul Maritz to CEO Pat Gelsinger and keeps pushing beyond its server virtualization roots, there are a lot of questions about where the company is headed. Here are 6 key issues the company should address at VMworld. <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=gigaom.com&#038;blog=14960843&#038;post=555304&#038;subd=gigaom2&#038;ref=&#038;feed=1" width="1" height="1" />]]></description>
				<content:encoded><![CDATA[<div id="attachment_543880" class="wp-caption alignright" style="width: 196px"><a href="http://gigaom.com/cloud/how-new-look-vmware-should-attack-the-emerging-cloud-landscape/patgelsinger/" rel="attachment wp-att-543880"><img  title="patgelsinger" src="http://gigaom2.files.wordpress.com/2012/07/patgelsinger-e1342569814192.jpg?w=186&#038;h=140" alt="" width="186" height="140" class="size-thumbnail wp-image-543880" /></a><p class="wp-caption-text">VMware&#8217;s new CEO Pat Gelsinger.</p></div>
<p>VMware&#8217;s annual <a href="http://www.vmworld.com/community/conference/us?src=PaidSearch_Google_VMworld_US_EN">VMworld</a> shindig is next week, giving the company a golden opportunity to answer a lot of questions about its future and its future products. Here are five topics the company needs to address at the event where incoming CEO Pat Gelsinger, and out-going CEO Paul Maritz  will both keynote.</p>
<p><strong>1: Settle the spin out question</strong></p>
<p>Will VMware spin out its <a href="http://gigaom.com/cloud/cloudfoundry-attacks-google-style-problem-with-bosh/">Cloud Foundry</a> platform as a service and other cloud assets or keep them in house? As <em>GigaOM</em> reported last month, <a href="http://gigaom.com/cloud/vmware-plans-cloud-spin-out-to-keep-up-with-microsoft-amazon-and-google/">a spinout was under consideration </a>as a way to help the company become a bigger player in cloud computing. Packaged together, the open-source Cloud Foundry PaaS, EMC&#8217;s Greenplum data analysis expertise and an <a href="http://gigaom.com/cloud/well-played-emc">infrastructure as a platform play </a>&#8211; all of that could make a cloud foundation that could take on rivals Amazon, Microsoft and Google.</p>
<p><strong>2: Clarify the software-defined data center product vision</strong></p>
<p>Folks want to hear VMware CTO Steve Herrod talk more about the company&#8217;s notion of a<a href="http://gigaom.com/cloud/vmware-the-software-defined-data-center-is-coming/"> software defined data center,</a> in which software and virtualization assume roles long-held by specialized hardware. <a href="http://gigaom.com/cloud/vmware-to-buy-nicira-for-1-26b-in-a-strategic-leap-of-faith/">VMware&#8217;s acquisition of Nicira </a>and its software-defined networking (SDN) prowess will play a big role there and will be the subject of much debate, although the deal is still in the works.</p>
<p><strong>3: Show the world who&#8217;s in charge</strong></p>
<p>One question that has dogged VMware since <a href="http://www.vmware.com/company/news/releases/emc.html">EMC bought it</a> in 2003 is: Who is running the show? When VMware said last month that Maritz (pictured below) would move over to parent company EMC as chief strategist and would be replaced by Gelsinger, president of EMC&#8217;s Information Infrastructure Products group, reaction was all over the map. People had many theories such as:</p>
<ul>
<li>A. This was a move by EMC CEO Joe Tucci to tie VMware more tightly to the mother ship.</li>
<li>B. Maritz was tired of day-to-day management.</li>
<li>C. Gelsinger had to be appeased after Tucci said he would stay on as EMC CEO at least through the end of 2013.</li>
<li>D: All of the above.</li>
<li>E: None of the above.</li>
</ul>
<p><a href="http://gigaom.com/broadband/vmwares-preparing-for-the-post-document-era/1z5o2616/" rel="attachment wp-att-366330"><img  title="Paul Maritz - CEO, VMware - Structure 2011" src="http://gigaom2.files.wordpress.com/2011/06/1z5o2616-e1314715569446.jpg?w=300&#038;h=200" alt="Paul Maritz - CEO, VMware - Structure 2011" width="300" height="200" class="alignleft size-medium wp-image-366330" /></a>VMware now needs to put these theories to rest with a statement of what Tucci&#8217;s role is and how he helps lead the company&#8217;s vision.</p>
<p><strong>4: Lose the &#8220;memory tax.&#8221; For real, and once and for all</strong></p>
<p>VMware infuriated customers last year with vSphere 5 licensing changes that amounted to a price hike (many dubbed it a <a href="http://www.theregister.co.uk/2011/07/13/vmware_esxi_5_0_analysis/">&#8220;memory tax.&#8221;</a>) With that release, VMware started charging a fee for the use of vSphere on every socket of a physical server and another fee on the amount of virtual memory used by the hypervisor. Now it&#8217;s been reported that <a href="http://searchservervirtualization.techtarget.com/news/2240161773/VMware-pros-hail-death-knell-for-vRAM">VMware will drop the practice</a>.</p>
<p>That single licensing move prompted many VMware shops to at least look at <a href="http://gigaom.com/cloud/hypervisor-bout-rages-on-no-end-in-sight/">Microsoft Hyper-V</a> or maybe XEN or KVM server virtualization alternatives. VMware needs to clean this up.</p>
<p><strong>5: Address fear of a brain drain</strong></p>
<p>What&#8217;s VMware doing to keep and recruit the best technical talent? As GigaOM&#8217;s Derrick Harris reported, a lot of engineers left VMware in the past year. Most recently Cloud Foundry luminary <a href="http://gigaom.com/cloud/cloud-foundry-loses-another-exec-as-david-mccrory-exits/">Dave McCrory</a> took a new job at Warner Music Group. This perception of<a href="- Will VMware be recruiting new senior engineering talent to replace recently departed management leadership?"> brain drain</a> worries people like Bart Copeland, CEO of ActiveState, a Cloud Foundry partner, who wonders if VMware is doing enough to find new senior engineers to replace those who have left.  Whether it&#8217;s accurate or not, there is a perception that VMware is bleeding tech talent.</p>
<p><strong>6:  Set priorities </strong></p>
<p>Everyone knows VMware wants to be more than a server virtualization vendor, but despite lots of acquisitions, <a href="http://gigaom.com/2009/08/10/vmware-to-buy-springsource-for-420m/">SpringSource</a>, Zimbra, etc. &#8212; its core strength remains squarely what it&#8217;s always been: server virtualization. What does VMware want to be? And how will it pursue that expanded agenda without <a href="http://gigaom.com/cloud/vmware-seeking-scale-took-its-eye-off-the-ball/">damaging or neglecting its core server virtualization franchise?</a></p>
<p>Gelsinger, who spent years at Intel before joining EMC, is viewed as a hardware guy, and many wonder how he will manage what is pretty clearly a software company.   They would like to know what VMware&#8217;s target market really is and who its primary competition is. Microsoft? Google? IBM? HP?  Positioning is important. So let&#8217;s hear about it.</p>
<br />  <img alt="" border="0" src="http://stats.wordpress.com/b.gif?host=gigaom.com&#038;blog=14960843&#038;post=555304&#038;subd=gigaom2&#038;ref=&#038;feed=1" width="1" height="1" /><p><a href="http://pubads.g.doubleclick.net/gampad/jump?iu=/1008864/GigaOM_RSS_300x250&#038;sz=300x250&#038;c=199220"><img src="http://pubads.g.doubleclick.net/gampad/ad?iu=/1008864/GigaOM_RSS_300x250&#038;sz=300x250&#038;c=199220" /></a></p><p><strong>Related research and analysis from GigaOM Pro:</strong><br />Subscriber content. <a href="http://pro.gigaom.com/?utm_source=cloud&utm_medium=editorial&utm_campaign=auto3&utm_term=555304+6-things-we-need-to-know-from-vmware&utm_content=gigabarb">Sign up for a free trial</a>.</p><ul><li><a href="http://pro.gigaom.com/2012/04/infrastructure-q1-cloud-and-big-data-woo-the-enterprise/?utm_source=cloud&utm_medium=editorial&utm_campaign=auto3&utm_term=555304+6-things-we-need-to-know-from-vmware&utm_content=gigabarb">Infrastructure Q1: Cloud and big data woo enterprises</a></li><li><a href="http://pro.gigaom.com/2012/03/a-near-term-outlook-for-big-data/?utm_source=cloud&utm_medium=editorial&utm_campaign=auto3&utm_term=555304+6-things-we-need-to-know-from-vmware&utm_content=gigabarb">A near-term outlook for big data</a></li><li><a href="http://pro.gigaom.com/2010/10/ma-alive-and-well-in-q3/?utm_source=cloud&utm_medium=editorial&utm_campaign=auto3&utm_term=555304+6-things-we-need-to-know-from-vmware&utm_content=gigabarb">In Q3, Big Data Meant Big Dollars</a></li></ul>]]></content:encoded>
			<wfw:commentRss>http://gigaom.com/2012/08/24/6-things-we-need-to-know-from-vmware/feed/</wfw:commentRss>
		<slash:comments>2</slash:comments>
	
		<media:thumbnail url="http://gigaom2.files.wordpress.com/2012/07/balloons.jpg?w=150" />
		<media:content url="http://gigaom2.files.wordpress.com/2012/07/balloons.jpg?w=150" medium="image">
			<media:title type="html">Google, VMware, Microsoft, and Amazon cloud</media:title>
		</media:content>

		<media:content url="http://1.gravatar.com/avatar/4af03439988d64f816da72496325cb73?s=96&#38;d=retro&#38;r=PG" medium="image">
			<media:title type="html">gigabarb</media:title>
		</media:content>

		<media:content url="http://gigaom2.files.wordpress.com/2012/07/patgelsinger-e1342569814192.jpg?w=186" medium="image">
			<media:title type="html">patgelsinger</media:title>
		</media:content>

		<media:content url="http://gigaom2.files.wordpress.com/2011/06/1z5o2616-e1314715569446.jpg?w=300" medium="image">
			<media:title type="html">Paul Maritz - CEO, VMware - Structure 2011</media:title>
		</media:content>
	</item>
	</channel>
</rss>
