add ostep homeworks
This commit is contained in:
447
related_info/ostep/ostep16-raid.py
Executable file
447
related_info/ostep/ostep16-raid.py
Executable file
@@ -0,0 +1,447 @@
|
||||
#! /usr/bin/env python
|
||||
|
||||
import math
|
||||
import random
|
||||
from optparse import OptionParser
|
||||
|
||||
# minimum unit of transfer to RAID
|
||||
BLOCKSIZE = 4096
|
||||
|
||||
def convert(size):
|
||||
length = len(size)
|
||||
lastchar = size[length-1]
|
||||
if (lastchar == 'k') or (lastchar == 'K'):
|
||||
m = 1024
|
||||
nsize = int(size[0:length-1]) * m
|
||||
elif (lastchar == 'm') or (lastchar == 'M'):
|
||||
m = 1024*1024
|
||||
nsize = int(size[0:length-1]) * m
|
||||
elif (lastchar == 'g') or (lastchar == 'G'):
|
||||
m = 1024*1024*1024
|
||||
nsize = int(size[0:length-1]) * m
|
||||
else:
|
||||
nsize = int(size)
|
||||
return nsize
|
||||
|
||||
class disk:
|
||||
def __init__(self, seekTime=10, xferTime=0.1, queueLen=8):
|
||||
# these are both in milliseconds
|
||||
# seek is the time to seek (simple constant amount)
|
||||
# transfer is the time to read one block
|
||||
self.seekTime = seekTime
|
||||
self.xferTime = xferTime
|
||||
|
||||
# length of scheduling queue
|
||||
self.queueLen = queueLen
|
||||
|
||||
# current location: make it negative so that whatever
|
||||
# the first read is, it causes a seek
|
||||
self.currAddr = -10000
|
||||
|
||||
# queue
|
||||
self.queue = []
|
||||
|
||||
# disk geometry
|
||||
self.numTracks = 100
|
||||
self.blocksPerTrack = 100
|
||||
self.blocksPerDisk = self.numTracks * self.blocksPerTrack
|
||||
|
||||
# stats
|
||||
self.countIO = 0
|
||||
self.countSeq = 0
|
||||
self.countNseq = 0
|
||||
self.countRand = 0
|
||||
self.utilTime = 0
|
||||
|
||||
def stats(self):
|
||||
return (self.countIO, self.countSeq, self.countNseq, self.countRand, self.utilTime)
|
||||
|
||||
def enqueue(self, addr):
|
||||
assert(addr < self.blocksPerDisk)
|
||||
self.countIO += 1
|
||||
|
||||
# check if this is on the same track, or a different one
|
||||
currTrack = self.currAddr / self.numTracks
|
||||
newTrack = addr / self.numTracks
|
||||
|
||||
# absolute diff
|
||||
diff = addr - self.currAddr
|
||||
|
||||
# if on the same track...
|
||||
if currTrack == newTrack or diff < self.blocksPerTrack:
|
||||
if diff == 1:
|
||||
self.countSeq += 1
|
||||
else:
|
||||
self.countNseq += 1
|
||||
self.utilTime += (diff * self.xferTime)
|
||||
else:
|
||||
self.countRand += 1
|
||||
self.utilTime += (self.seekTime + self.xferTime)
|
||||
self.currAddr = addr
|
||||
|
||||
def go(self):
|
||||
return self.utilTime
|
||||
|
||||
class raid:
|
||||
def __init__(self, chunkSize='4k', numDisks=4, level=0, timing=False, reverse=False, solve=False, raid5type='LS'):
|
||||
chunkSize = int(convert(chunkSize))
|
||||
self.chunkSize = chunkSize / BLOCKSIZE
|
||||
self.numDisks = numDisks
|
||||
self.raidLevel = level
|
||||
self.timing = timing
|
||||
self.reverse = reverse
|
||||
self.solve = solve
|
||||
self.raid5type = raid5type
|
||||
|
||||
if (chunkSize % BLOCKSIZE) != 0:
|
||||
print 'chunksize (%d) must be multiple of blocksize (%d): %d' % (chunkSize, BLOCKSIZE, self.chunkSize % BLOCKSIZE)
|
||||
exit(1)
|
||||
if self.raidLevel == 1 and numDisks % 2 != 0:
|
||||
print 'raid1: disks (%d) must be a multiple of two' % numDisks
|
||||
exit(1)
|
||||
|
||||
if self.raidLevel == 4:
|
||||
self.blocksInStripe = (self.numDisks - 1) * self.chunkSize
|
||||
self.pdisk = self.numDisks - 1
|
||||
if self.raidLevel == 5:
|
||||
self.blocksInStripe = (self.numDisks - 1) * self.chunkSize
|
||||
self.pdisk = -1
|
||||
|
||||
self.disks = []
|
||||
for i in range(self.numDisks):
|
||||
self.disks.append(disk())
|
||||
|
||||
# print per-disk stats
|
||||
def stats(self, totalTime):
|
||||
for d in range(self.numDisks):
|
||||
s = self.disks[d].stats()
|
||||
if s[4] == totalTime:
|
||||
print 'disk:%d busy: %.2f I/Os: %5d (sequential:%d nearly:%d random:%d)' % (d, (100.0*float(s[4])/totalTime), s[0], s[1], s[2], s[3])
|
||||
elif s[4] == 0:
|
||||
print 'disk:%d busy: %.2f I/Os: %5d (sequential:%d nearly:%d random:%d)' % (d, (100.0*float(s[4])/totalTime), s[0], s[1], s[2], s[3])
|
||||
else:
|
||||
print 'disk:%d busy: %.2f I/Os: %5d (sequential:%d nearly:%d random:%d)' % (d, (100.0*float(s[4])/totalTime), s[0], s[1], s[2], s[3])
|
||||
|
||||
# global enqueue function
|
||||
def enqueue(self, addr, size, isWrite):
|
||||
# should we print out the logical operation?
|
||||
if self.timing == False:
|
||||
if self.solve or self.reverse==False:
|
||||
if isWrite:
|
||||
print 'LOGICAL WRITE to addr:%d size:%d' % (addr, size * BLOCKSIZE)
|
||||
else:
|
||||
print 'LOGICAL READ from addr:%d size:%d' % (addr, size * BLOCKSIZE)
|
||||
if self.solve == False:
|
||||
print ' Physical reads/writes?\n'
|
||||
else:
|
||||
print 'LOGICAL OPERATION is ?'
|
||||
|
||||
# should we print out the physical operations?
|
||||
if self.timing == False and (self.solve or self.reverse==True):
|
||||
self.printPhysical = True
|
||||
else:
|
||||
self.printPhysical = False
|
||||
|
||||
if self.raidLevel == 0:
|
||||
self.enqueue0(addr, size, isWrite)
|
||||
elif self.raidLevel == 1:
|
||||
self.enqueue1(addr, size, isWrite)
|
||||
elif self.raidLevel == 4 or self.raidLevel == 5:
|
||||
self.enqueue45(addr, size, isWrite)
|
||||
|
||||
# process disk workloads one at a time, returning final completion time
|
||||
def go(self):
|
||||
tmax = 0
|
||||
for d in range(self.numDisks):
|
||||
# print '**** disk ****', d
|
||||
t = self.disks[d].go()
|
||||
if t > tmax:
|
||||
tmax = t
|
||||
return tmax
|
||||
|
||||
# helper functions
|
||||
def doSingleRead(self, disk, off, doNewline=False):
|
||||
if self.printPhysical:
|
||||
print ' read [disk %d, offset %d] ' % (disk, off),
|
||||
if doNewline:
|
||||
print ''
|
||||
self.disks[disk].enqueue(off)
|
||||
|
||||
def doSingleWrite(self, disk, off, doNewline=False):
|
||||
if self.printPhysical:
|
||||
print ' write [disk %d, offset %d] ' % (disk, off),
|
||||
if doNewline:
|
||||
print ''
|
||||
self.disks[disk].enqueue(off)
|
||||
|
||||
#
|
||||
# mapping for RAID 0 (striping)
|
||||
#
|
||||
def bmap0(self, bnum):
|
||||
cnum = bnum / self.chunkSize
|
||||
coff = bnum % self.chunkSize
|
||||
return (cnum % self.numDisks, (cnum / self.numDisks) * self.chunkSize + coff)
|
||||
|
||||
def enqueue0(self, addr, size, isWrite):
|
||||
# can ignore isWrite, as I/O pattern is the same for striping
|
||||
for b in range(addr, addr+size):
|
||||
(disk, off) = self.bmap0(b)
|
||||
if isWrite:
|
||||
self.doSingleWrite(disk, off, True)
|
||||
else:
|
||||
self.doSingleRead(disk, off, True)
|
||||
if self.timing == False and self.printPhysical:
|
||||
print ''
|
||||
|
||||
#
|
||||
# mapping for RAID 1 (mirroring)
|
||||
#
|
||||
def bmap1(self, bnum):
|
||||
cnum = bnum / self.chunkSize
|
||||
coff = bnum % self.chunkSize
|
||||
disk = 2 * (cnum % (self.numDisks / 2))
|
||||
return (disk, disk + 1, (cnum / (self.numDisks / 2)) * self.chunkSize + coff)
|
||||
|
||||
def enqueue1(self, addr, size, isWrite):
|
||||
for b in range(addr, addr+size):
|
||||
(disk1, disk2, off) = self.bmap1(b)
|
||||
# print 'enqueue:', addr, size, '-->', m
|
||||
if isWrite:
|
||||
self.doSingleWrite(disk1, off, False)
|
||||
self.doSingleWrite(disk2, off, True)
|
||||
else:
|
||||
# the raid-1 read balancing algorithm is here;
|
||||
# could be something more intelligent --
|
||||
# instead, it is just based on the disk offset
|
||||
# to produce something easily reproducible
|
||||
if off % 2 == 0:
|
||||
self.doSingleRead(disk1, off, True)
|
||||
else:
|
||||
self.doSingleRead(disk2, off, True)
|
||||
if self.timing == False and self.printPhysical:
|
||||
print ''
|
||||
|
||||
#
|
||||
# mapping for RAID 4 (parity disk)
|
||||
#
|
||||
# assumes (for now) that there is just one parity disk
|
||||
#
|
||||
def bmap4(self, bnum):
|
||||
cnum = bnum / self.chunkSize
|
||||
coff = bnum % self.chunkSize
|
||||
return (cnum % (self.numDisks - 1), (cnum / (self.numDisks - 1)) * self.chunkSize + coff)
|
||||
|
||||
def pmap4(self, snum):
|
||||
return self.pdisk
|
||||
|
||||
#
|
||||
# mapping for RAID 5 (rotated parity)
|
||||
#
|
||||
def __bmap5(self, bnum):
|
||||
cnum = bnum / self.chunkSize
|
||||
coff = bnum % self.chunkSize
|
||||
ddsk = cnum / (self.numDisks - 1)
|
||||
doff = (ddsk * self.chunkSize) + coff
|
||||
disk = cnum % (self.numDisks - 1)
|
||||
col = (ddsk % self.numDisks)
|
||||
pdsk = (self.numDisks - 1) - col
|
||||
|
||||
# supports left-asymmetric and left-symmetric layouts
|
||||
if self.raid5type == 'LA':
|
||||
if disk >= pdisk:
|
||||
disk += 1
|
||||
elif self.raid5type == 'LS':
|
||||
disk = (disk - col) % (self.numDisks)
|
||||
else:
|
||||
print 'error: no such RAID scheme'
|
||||
exit(1)
|
||||
assert(disk != pdsk)
|
||||
return (disk, pdsk, doff)
|
||||
|
||||
# yes this is lame (redundant call to __bmap5 is serious programmer laziness)
|
||||
def bmap5(self, bnum):
|
||||
(disk, pdisk, off) = self.__bmap5(bnum)
|
||||
return (disk, off)
|
||||
|
||||
# this too is lame (redundant call to __bmap5 is serious programmer laziness)
|
||||
def pmap5(self, snum):
|
||||
(disk, pdisk, off) = self.__bmap5(snum * self.blocksInStripe)
|
||||
return pdisk
|
||||
|
||||
# RAID 4/5 helper routine to write out some blocks in a stripe
|
||||
def doPartialWrite(self, stripe, begin, end, bmap, pmap):
|
||||
numWrites = end - begin
|
||||
pdisk = pmap(stripe)
|
||||
if (numWrites + 1) <= (self.blocksInStripe - numWrites):
|
||||
# SUBTRACTIVE PARITY
|
||||
# print 'SUBTRACTIVE'
|
||||
offList = []
|
||||
for voff in range(begin, end):
|
||||
(disk, off) = bmap(voff)
|
||||
self.doSingleRead(disk, off)
|
||||
if off not in offList:
|
||||
offList.append(off)
|
||||
for i in range(len(offList)):
|
||||
self.doSingleRead(pdisk, offList[i], i == (len(offList) - 1))
|
||||
else:
|
||||
# ADDITIVE PARITY
|
||||
# print 'ADDITIVE'
|
||||
stripeBegin = stripe * self.blocksInStripe
|
||||
stripeEnd = stripeBegin + self.blocksInStripe
|
||||
for voff in range(stripeBegin, begin):
|
||||
(disk, off) = bmap(voff)
|
||||
self.doSingleRead(disk, off, (voff == (begin - 1)) and (end == stripeEnd))
|
||||
for voff in range(end, stripeEnd):
|
||||
(disk, off) = bmap(voff)
|
||||
self.doSingleRead(disk, off, voff == (stripeEnd - 1))
|
||||
|
||||
# WRITES: same for additive or subtractive parity
|
||||
offList = []
|
||||
for voff in range(begin, end):
|
||||
(disk, off) = bmap(voff)
|
||||
self.doSingleWrite(disk, off)
|
||||
if off not in offList:
|
||||
offList.append(off)
|
||||
for i in range(len(offList)):
|
||||
self.doSingleWrite(pdisk, offList[i], i == (len(offList) - 1))
|
||||
|
||||
# RAID 4/5 enqueue routine
|
||||
def enqueue45(self, addr, size, isWrite):
|
||||
if self.raidLevel == 4:
|
||||
(bmap, pmap) = (self.bmap4, self.pmap4)
|
||||
elif self.raidLevel == 5:
|
||||
(bmap, pmap) = (self.bmap5, self.pmap5)
|
||||
|
||||
if isWrite == False:
|
||||
for b in range(addr, addr+size):
|
||||
(disk, off) = bmap(b)
|
||||
self.doSingleRead(disk, off)
|
||||
else:
|
||||
# process the write request, one stripe at a time
|
||||
initStripe = (addr) / self.blocksInStripe
|
||||
finalStripe = (addr + size - 1) / self.blocksInStripe
|
||||
|
||||
left = size
|
||||
begin = addr
|
||||
for stripe in range(initStripe, finalStripe + 1):
|
||||
endOfStripe = (stripe * self.blocksInStripe) + self.blocksInStripe
|
||||
|
||||
if left >= self.blocksInStripe:
|
||||
end = begin + self.blocksInStripe
|
||||
else:
|
||||
end = begin + left
|
||||
|
||||
if end >= endOfStripe:
|
||||
end = endOfStripe
|
||||
|
||||
self.doPartialWrite(stripe, begin, end, bmap, pmap)
|
||||
|
||||
left -= (end - begin)
|
||||
begin = end
|
||||
|
||||
# for all cases, print this for pretty-ness in mapping mode
|
||||
if self.timing == False and self.printPhysical:
|
||||
print ''
|
||||
|
||||
#
|
||||
# main program
|
||||
#
|
||||
parser = OptionParser()
|
||||
|
||||
parser.add_option('-s', '--seed', default=0, help='the random seed', action='store', type='int', dest='seed')
|
||||
parser.add_option('-D', '--numDisks', default=4, help='number of disks in RAID', action='store', type='int', dest='numDisks')
|
||||
parser.add_option('-C', '--chunkSize', default='4k', help='chunk size of the RAID', action='store', type='string', dest='chunkSize')
|
||||
parser.add_option('-n', '--numRequests', default=10, help='number of requests to simulate', action='store', type='int', dest='numRequests')
|
||||
parser.add_option('-S', '--reqSize', default='4k', help='size of requests', action='store', type='string', dest='size')
|
||||
parser.add_option('-W', '--workload', default='rand', help='either "rand" or "seq" workloads', action='store', type='string', dest='workload')
|
||||
parser.add_option('-w', '--writeFrac', default=0, help='write fraction (100->all writes, 0->all reads)', action='store', type='int', dest='writeFrac')
|
||||
parser.add_option('-R', '--randRange', default=10000, help='range of requests (when using "rand" workload)', action='store', type='int', dest='range')
|
||||
parser.add_option('-L', '--level', default=0, help='RAID level (0, 1, 4, 5)', action='store', type='int', dest='level')
|
||||
parser.add_option('-5', '--raid5', default='LS', help='RAID-5 left-symmetric "LS" or left-asym "LA"', action='store', type='string', dest='raid5type')
|
||||
parser.add_option('-r', '--reverse', default=False, help='instead of showing logical ops, show physical', action='store_true', dest='reverse')
|
||||
parser.add_option('-t', '--timing', default=False, help='use timing mode, instead of mapping mode', action='store_true', dest='timing')
|
||||
parser.add_option('-c', '--compute', default=False, help='compute answers for me', action='store_true', dest='solve')
|
||||
|
||||
(options, args) = parser.parse_args()
|
||||
|
||||
print 'ARG blockSize', BLOCKSIZE
|
||||
print 'ARG seed', options.seed
|
||||
print 'ARG numDisks', options.numDisks
|
||||
print 'ARG chunkSize', options.chunkSize
|
||||
print 'ARG numRequests', options.numRequests
|
||||
print 'ARG reqSize', options.size
|
||||
print 'ARG workload', options.workload
|
||||
print 'ARG writeFrac', options.writeFrac
|
||||
print 'ARG randRange', options.range
|
||||
print 'ARG level', options.level
|
||||
print 'ARG raid5', options.raid5type
|
||||
print 'ARG reverse', options.reverse
|
||||
print 'ARG timing', options.timing
|
||||
|
||||
print ''
|
||||
|
||||
writeFrac = float(options.writeFrac) / 100.0
|
||||
assert(writeFrac >= 0.0 and writeFrac <= 1.0)
|
||||
|
||||
random.seed(options.seed)
|
||||
|
||||
size = convert(options.size)
|
||||
if size % BLOCKSIZE != 0:
|
||||
print 'error: request size (%d) must be a multiple of BLOCKSIZE (%d)' % (size, BLOCKSIZE)
|
||||
exit(1)
|
||||
size = size / BLOCKSIZE
|
||||
|
||||
if options.workload == 'seq' or options.workload == 's' or options.workload == 'sequential':
|
||||
workloadIsSequential = True
|
||||
elif options.workload == 'rand' or options.workload == 'r' or options.workload == 'random':
|
||||
workloadIsSequential = False
|
||||
else:
|
||||
print 'error: workload must be either r/rand/random or s/seq/sequential'
|
||||
exit(1)
|
||||
|
||||
assert(options.level == 0 or options.level == 1 or options.level == 4 or options.level == 5)
|
||||
if options.level != 0 and options.numDisks < 2:
|
||||
print 'RAID-4 and RAID-5 need more than 1 disk'
|
||||
exit(1)
|
||||
|
||||
if options.level == 5 and options.raid5type != 'LA' and options.raid5type != 'LS':
|
||||
print 'Only two types of RAID-5 supported: left-asymmetric (LA) and left-symmetric (LS) (%s is not)' % options.raid5type
|
||||
exit(1)
|
||||
|
||||
# instantiate RAID
|
||||
r = raid(chunkSize=options.chunkSize, numDisks=options.numDisks, level=options.level, timing=options.timing,
|
||||
reverse=options.reverse, solve=options.solve, raid5type=options.raid5type)
|
||||
|
||||
# generate requests
|
||||
off = 0
|
||||
for i in range(options.numRequests):
|
||||
if workloadIsSequential == True:
|
||||
blk = off
|
||||
off += size
|
||||
else:
|
||||
blk = int(random.random() * options.range)
|
||||
if random.random() < writeFrac:
|
||||
r.enqueue(blk, size, True)
|
||||
else:
|
||||
r.enqueue(blk, size, False)
|
||||
|
||||
# process requests
|
||||
t = r.go()
|
||||
|
||||
# print out some final info, if needed
|
||||
if options.timing == False:
|
||||
print ''
|
||||
exit(0)
|
||||
|
||||
if options.solve:
|
||||
print ''
|
||||
r.stats(t)
|
||||
print ''
|
||||
print 'STAT totalTime', t
|
||||
print ''
|
||||
else:
|
||||
print ''
|
||||
print 'Estimate how long the workload should take to complete.'
|
||||
print '- Roughly how many requests should each disk receive?'
|
||||
print '- How many requests are random, how many sequential?'
|
||||
print ''
|
||||
Reference in New Issue
Block a user