Skip to content

Commit

Permalink
Merge pull request #27 from svenkreiss/parallelize-partitions
Browse files Browse the repository at this point in the history
factor out _parallelize_partitions()
  • Loading branch information
svenkreiss committed Jan 29, 2016
2 parents 94aca17 + 0397ead commit 0c8ab1a
Showing 1 changed file with 17 additions and 2 deletions.
19 changes: 17 additions & 2 deletions pysparkling/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,9 +145,24 @@ def partitioned():
end = int((i+1) * len_x/numPartitions)
if i+1 == numPartitions:
end += 1
yield Partition(itertools.islice(x, end-start), i)
yield itertools.islice(x, end-start)

return RDD(partitioned(), self)
return self._parallelize_partitions(partitioned())

def _parallelize_partitions(self, partitions):
"""
:param partitions:
An iterable over the partitioned data.
:returns:
New RDD.
"""

return RDD(
(Partition(p_data, i) for i, p_data in enumerate(partitions)),
self,
)

def pickleFile(self, name, minPartitions=None):
"""
Expand Down

0 comments on commit 0c8ab1a

Please sign in to comment.