Merge branch 'llrt'

This commit is contained in:
Siu Kwan Lam 2013-08-14 17:45:58 -05:00
commit 5f01343bba
19 changed files with 1309 additions and 6 deletions

22
llrtc/Makefile Normal file
View file

@ -0,0 +1,22 @@
all:
make -C lib
ir:
make -C lib ir
test:
make -C lib test
clean-test:
make -C lib clean-test
clean-temp:
make -C lib clean-temp
clean:
make -C lib clean
install: ir
cp llrt_*.ll ../llvm/llrt
make -C lib clean-temp

25
llrtc/README.md Normal file
View file

@ -0,0 +1,25 @@
# LLRT: Low Level Runtime
## Why?
The same reason for LLVM compiler-rt. LLVM generates libgcc symbols, such as
__divdi3 for 64-bit division on 32-bit platform. They are not also available.
We need to ship compiler-rt but it is not Windows ready.
This subproject aims to provide a small portable subset of compiler-rt.
Start small and add only the things we really needed.
Performance is not crucial but should not be terrible.
Functionality and usefullness should be more important than performance.
## Developer Instructions
LLRT implements some functionalities in compiler-rt in ANSI C.
The C files are compiled using clang to produce LLVM IR which are shipped.
The IR files are committed in the repository.
So, remember to build the IR files commit them after modifying the C files.
## Build Requirement
- Make
- Clang
- Python

4
llrtc/lib/.gitignore vendored Normal file
View file

@ -0,0 +1,4 @@
*.o
*.run
*.out
*.ll

66
llrtc/lib/Makefile Normal file
View file

@ -0,0 +1,66 @@
OUTPUT = llrt
SOURCES = udivmod64.c sdivmod64.c div64.c mod64.c
TESTS = test_udivmod64.c test_sdivmod64.c
CLANG = clang
LLVM_LINK = llvm-link
CF = -Wall -ansi
CF_TEST = $(CF) -ftrapv
CF_BUILD = $(CF) -O0 -emit-llvm
OUTDIR = ..
STRIPPER = ../tools/striptriple.py
all: ir
ir: $(OUTDIR)/$(OUTPUT)_x86.ll $(OUTDIR)/$(OUTPUT)_x86_64.ll
$(OUTDIR)/$(OUTPUT)_x86.ll: $(SOURCES:.c=_x86.bc)
$(LLVM_LINK) -S $+ -o $@
python $(STRIPPER) $@
$(OUTDIR)/$(OUTPUT)_x86_64.ll: $(SOURCES:.c=_x86_64.bc)
$(LLVM_LINK) -S $+ -o $@
python $(STRIPPER) $@
build-test: $(SOURCES:.c=.o) $(TESTS:.c=.run)
lib$(OUTPUT).a: $(SOURCES:.c=.o)
$(CLANG) -static $+ -o $@
test: $(TESTS:.c=.run)
for src in $+; do \
echo "testing $${src}"; \
python $${src%.*}.py > $${src%.*}.out; \
done;
clean-test:
rm -f *.out
rm -f *.o
rm -f *.run
clean-dist: clean-temp
rm -f *.ll
clean-temp:
rm -f *.bc
rm -f *.o
rm -f *.out
clean: clean-test clean-dist
%.c: llrt.h
%_x86.bc: %.c
$(CLANG) -m32 $(CF_BUILD) -c $< -o $@
%_x86_64.bc: %.c
$(CLANG) -m64 $(CF_BUILD) -c $< -o $@
%.o: %.c
$(CLANG) $(CF_TEST) -c $<
%.run: %.c
$(CLANG) $(CF_TEST) -o $@ $+
test_udivmod64.run: udivmod64.o
test_sdivmod64.run: udivmod64.o sdivmod64.o

11
llrtc/lib/div64.c Normal file
View file

@ -0,0 +1,11 @@
#include "llrt.h"
uint64_t udiv64(uint64_t dividend, uint64_t divisor)
{
return udivmod64(dividend, divisor, NULL);
}
int64_t sdiv64(int64_t dividend, int64_t divisor)
{
return sdivmod64(dividend, divisor, NULL);
}

19
llrtc/lib/llrt.h Normal file
View file

@ -0,0 +1,19 @@
#ifndef LLRT_H_
#define LLRT_H_
#include <stdint.h>
#define NULL 0
#define BITS_PER_BYTE 8
uint64_t udivmod64(uint64_t dividend, uint64_t divisor, uint64_t *remainder);
int64_t sdivmod64(int64_t dividend, int64_t divisor, int64_t *remainder);
uint64_t udiv64(uint64_t dividend, uint64_t divisor);
int64_t sdiv64(int64_t dividend, int64_t divisor);
uint64_t umod64(uint64_t dividend, uint64_t divisor);
int64_t smod64(int64_t dividend, int64_t divisor);
#endif /* LLRT_H_ */

15
llrtc/lib/mod64.c Normal file
View file

@ -0,0 +1,15 @@
#include "llrt.h"
uint64_t umod64(uint64_t dividend, uint64_t divisor)
{
uint64_t rem;
udivmod64(dividend, divisor, &rem);
return rem;
}
int64_t smod64(int64_t dividend, int64_t divisor)
{
int64_t rem;
sdivmod64(dividend, divisor, &rem);
return rem;
}

40
llrtc/lib/sdivmod64.c Normal file
View file

@ -0,0 +1,40 @@
#include "llrt.h"
#include <stdio.h>
/*
Calls to udivmod64 internally.
Note: remainder uses sign of divisor.
*/
int64_t sdivmod64(int64_t dividend, int64_t divisor, int64_t *remainder)
{
int signbitidx = BITS_PER_BYTE * sizeof(dividend) - 1;
int signed_dividend = dividend < 0;
int signed_divisor = divisor < 0;
int signed_result = signed_divisor ^ signed_dividend;
int64_t quotient;
uint64_t udvd, udvr, uquotient, uremainder;
udvd = signed_dividend ? -dividend : dividend;
udvr = signed_divisor ? -divisor : divisor;
uquotient = udivmod64(udvd, udvr, &uremainder);
if (signed_result){
if (uremainder) {
quotient = -(int64_t)uquotient - 1;
} else {
quotient = -(int64_t)uquotient;
}
if (remainder) {
/* if signed, there could be unsigned overflow
causing undefined behavior */
*remainder = (uint64_t)dividend - (uint64_t)quotient * (uint64_t)divisor;
}
} else {
quotient = (int64_t)uquotient;
if (remainder) {
*remainder = signed_divisor ? -uremainder : uremainder;
}
}
return quotient;
}

View file

@ -0,0 +1,21 @@
#include <stdio.h>
#include <stdint.h>
#include "llrt.h"
int main(int argc, char * argv[]){
int64_t n, d, q, r;
if (argc != 3) {
printf("invalid argument: %s dividend divisor", argv[0]);
return 1;
}
sscanf(argv[1], "%lld", &n);
sscanf(argv[2], "%lld", &d);
q = sdivmod64(n, d, &r);
printf("%lld\n", q);
printf("%lld\n", r);
return 0;
}

View file

@ -0,0 +1,56 @@
import math
import os
import subprocess
udt = os.path.join('.', 'test_sdivmod64.run')
def testcase(dividend, divisor):
print 'divmod64(%d, %d)' % (dividend, divisor)
procargs = ('%s %s %s' % (udt, dividend, divisor)).split()
result = subprocess.check_output(procargs)
gotQ, gotR = map(int, result.splitlines())
expectQ = dividend // divisor
expectR = dividend % divisor
print 'Q = %d, R = %d' % (gotQ, gotR)
if expectQ != gotQ:
raise ValueError("invalid quotient: got=%d but expect=%d" %
(gotQ, expectQ))
if expectR != gotR:
raise ValueError("invalid remainder: got=%d but expect=%d" %
(gotR, expectR))
print 'OK'
def testsequence():
subjects = [
(0, 1),
(0, 0xffffffff),
(1, 2),
(1, 983219),
(2, 2),
(3, 2),
(1024, 2),
(2048, 512),
(21321, 512),
(9329189, 1031),
(0xffffffff, 2),
(0xffffffff, 0xffff),
(0x1ffffffff, 2),
(0x1ffffffff, 0xffff),
(0xffff, 0xffffffff),
(0x0fffffffffffffff, 0xffff),
(0x7fffffffffffffff, 0x7fffffffffffffff),
(0x7fffffffffffffff, 0x7ffffffffffffff0),
(0x7fffffffffffffff, 87655678587161901),
]
for dvd, dvr in subjects:
testcase(dvd, dvr)
testcase(dvd, -dvr)
testcase(-dvd, dvr)
testcase(-dvd, -dvr)
if __name__ == '__main__':
testsequence()

View file

@ -0,0 +1,20 @@
#include <stdio.h>
#include <stdint.h>
#include "llrt.h"
int main(int argc, char * argv[]){
uint64_t n, d, q, r;
if (argc != 3) {
printf("invalid argument: %s dividend divisor", argv[0]);
return 1;
}
sscanf(argv[1], "%llu", &n);
sscanf(argv[2], "%llu", &d);
q = udivmod64(n, d, &r);
printf("%llu\n", q);
printf("%llu\n", r);
return 0;
}

View file

@ -0,0 +1,53 @@
import math
import os
import subprocess
udt = os.path.join('.', 'test_udivmod64.run')
def testcase(dividend, divisor):
print 'divmod64(%d, %d)' % (dividend, divisor)
procargs = ('%s %s %s' % (udt, dividend, divisor)).split()
result = subprocess.check_output(procargs)
gotQ, gotR = map(int, result.splitlines())
expectQ = dividend // divisor
expectR = dividend % divisor
print 'Q = %d, R = %d' % (gotQ, gotR)
if expectQ != gotQ:
raise ValueError("invalid quotient: got=%d but expect=%d" %
(gotQ, expectQ))
if expectR != gotR:
raise ValueError("invalid remainder: got=%d but expect=%d" %
(gotR, expectR))
print 'OK'
def testsequence():
subjects = [
(0, 1),
(0, 0xffffffffffffffff),
(1, 2),
(1, 983219),
(2, 2),
(3, 2),
(1024, 2),
(2048, 512),
(21321, 512),
(9329189, 1031),
(0xffffffff, 2),
(0xffffffff, 0xffff),
(0x1ffffffff, 2),
(0x1ffffffff, 0xffff),
(0xffff, 0xffffffff),
(0xffffffffffffffff, 0xffff),
(0xffffffffffffffff, 0x7fffffffffffffff),
(0xffffffffffffffff, 0xfffffffffffffff0),
(0xffffffffffffffff, 87655678587161901),
]
for dvd, dvr in subjects:
testcase(dvd, dvr)
if __name__ == '__main__':
testsequence()

84
llrtc/lib/udivmod64.c Normal file
View file

@ -0,0 +1,84 @@
/*
Implements unsigned divmod using for platform missing 64-bit division and/or
modulo functions.
*/
#include "llrt.h"
/*
count left zero for 64-bit words
*/
static
int clz64(uint64_t x)
{
const int total_bits = sizeof(x) * BITS_PER_BYTE;
int zc = 0;
while (zc < total_bits && ((x >> (total_bits - zc - 1)) & 1) == 0) {
++zc;
}
return zc;
}
typedef struct div_state_
{
uint64_t tmp, dvd;
} div_state;
/*
Left shift div_state by 1 bit
*/
static
void div_state_lshift(div_state *state)
{
state->tmp = (state->tmp << 1) | (state->dvd >> 63);
state->dvd = state->dvd << 1;
}
/*
Division of unsigned 64-bit word using 64-bit addition and subtration following
the shift-restore division algorithm.
For those interested in 32-bit implementation,
mapping of 64-bit addition and subtraction to 32-bit should be trivial.
Reference:
- IBM. The PowerPC Compiler Writer's Guide
- LLVM compiler-rt
Assumptions:
- all operands and results are positive
- unsigned wrapped around
*/
uint64_t udivmod64(uint64_t dividend, uint64_t divisor, uint64_t *remainder)
{
div_state state = {0, dividend};
uint64_t quotient = 0;
int i;
int skipahead;
if (divisor == 0) {
return 1 / 0; /* intentionally div by zero */
}
/*
skipahead to reduce iteration
*/
skipahead = clz64(dividend);
for (i = 0; i < skipahead; ++i) {
div_state_lshift(&state);
}
/*
division loop
*/
for (i = skipahead; i < 64; ++i) {
div_state_lshift(&state);
if (state.tmp >= divisor) {
state.tmp = state.tmp - divisor;
quotient |= 1ull << (63 - i);
}
}
if (remainder) *remainder = state.tmp;
return quotient;
}

View file

@ -0,0 +1,15 @@
import sys
import re
buf = []
with open(sys.argv[1], 'r') as fin:
tripleline = re.compile('^target\s+triple\s+=\s+')
for line in fin.readlines():
if not tripleline.match(line):
buf.append(line)
with open(sys.argv[1], 'w') as fout:
for line in buf:
fout.write(line)

View file

@ -1848,6 +1848,9 @@ class Instruction(User):
def erase_from_parent(self):
return self._ptr.eraseFromParent()
def replace_all_uses_with(self, inst):
self._ptr.replaceAllUsesWith(inst)
class CallOrInvokeInstruction(Instruction):
_type_ = api.llvm.CallInst, api.llvm.InvokeInst

77
llvm/llrt.py Normal file
View file

@ -0,0 +1,77 @@
import os
import llvm.core as lc
import llvm.passes as lp
import llvm.ee as le
def replace_divmod64(lfunc):
'''Replaces all 64-bit integer division (sdiv, udiv) and modulo (srem, urem)
'''
int64 = lc.Type.int(64)
int64ptr = lc.Type.pointer(lc.Type.int(64))
functy = lc.Type.function(int64, [int64, int64])
udiv64 = lfunc.module.get_or_insert_function(functy, '__llrt_udiv64')
sdiv64 = lfunc.module.get_or_insert_function(functy, '__llrt_sdiv64')
umod64 = lfunc.module.get_or_insert_function(functy, '__llrt_umod64')
smod64 = lfunc.module.get_or_insert_function(functy, '__llrt_smod64')
builder = lc.Builder.new(lfunc.entry_basic_block)
for bb in lfunc.basic_blocks:
for inst in bb.instructions:
if inst.opcode_name == 'sdiv' and inst.type == int64:
_replace_with(builder, inst, sdiv64)
elif inst.opcode_name == 'udiv' and inst.type == int64:
_replace_with(builder, inst, udiv64)
elif inst.opcode_name == 'srem' and inst.type == int64:
_replace_with(builder, inst, smod64)
elif inst.opcode_name == 'urem' and inst.type == int64:
_replace_with(builder, inst, umod64)
def _replace_with(builder, inst, func):
'''Replace instruction with a call to the function with the same operands
as arguments.
'''
builder.position_before(inst)
replacement = builder.call(func, inst.operands)
inst.replace_all_uses_with(replacement._ptr)
inst.erase_from_parent()
def load(arch):
'''Load the LLRT module corresponding to the given architecture
Creates a new module and optimizes it using the information from
the host machine.
'''
path = os.path.join(os.path.dirname(__file__), 'llrt', 'llrt_%s.ll' % arch)
with open(path) as fin:
lib = lc.Module.from_assembly(fin)
# run passes to optimize
tm = le.TargetMachine.new()
pms = lp.build_pass_managers(tm, opt=3, fpm=False)
pms.pm.run(lib)
return lib
class LLRT(object):
def __init__(self):
arch = le.get_default_triple().split('-', 1)[0]
self.module = load(arch)
self.engine = le.EngineBuilder.new(self.module).opt(3).create()
self.installed_symbols = set()
def install_symbols(self):
'''Bind all the external symbols to the global symbol map.
Any future reference to these symbols will be automatically resolved
by LLVM.
'''
for lfunc in self.module.functions:
if lfunc.linkage == lc.LINKAGE_EXTERNAL:
mangled = '__llrt_' + lfunc.name
self.installed_symbols.add(mangled)
ptr = self.engine.get_pointer_to_function(lfunc)
le.dylib_add_symbol(mangled, ptr)
def uninstall_symbols(self):
for sym in self.installed_symbols:
le.dylib_add_symbol(sym, 0)

371
llvm/llrt/llrt_x86.ll Normal file
View file

@ -0,0 +1,371 @@
; ModuleID = 'udivmod64_x86.bc'
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
%struct.div_state_ = type { i64, i64 }
define i64 @udivmod64(i64 %dividend, i64 %divisor, i64* %remainder) nounwind ssp {
%1 = alloca i64, align 4
%2 = alloca i64, align 8
%3 = alloca i64, align 8
%4 = alloca i64*, align 4
%state = alloca %struct.div_state_, align 4
%quotient = alloca i64, align 8
%i = alloca i32, align 4
%skipahead = alloca i32, align 4
store i64 %dividend, i64* %2, align 8
store i64 %divisor, i64* %3, align 8
store i64* %remainder, i64** %4, align 4
%5 = getelementptr inbounds %struct.div_state_* %state, i32 0, i32 0
store i64 0, i64* %5, align 4
%6 = getelementptr inbounds %struct.div_state_* %state, i32 0, i32 1
%7 = load i64* %2, align 8
store i64 %7, i64* %6, align 4
store i64 0, i64* %quotient, align 8
%8 = load i64* %3, align 8
%9 = icmp eq i64 %8, 0
br i1 %9, label %10, label %11
; <label>:10 ; preds = %0
store i64 0, i64* %1
br label %57
; <label>:11 ; preds = %0
%12 = load i64* %2, align 8
%13 = call i32 @clz64(i64 %12)
store i32 %13, i32* %skipahead, align 4
store i32 0, i32* %i, align 4
br label %14
; <label>:14 ; preds = %19, %11
%15 = load i32* %i, align 4
%16 = load i32* %skipahead, align 4
%17 = icmp slt i32 %15, %16
br i1 %17, label %18, label %22
; <label>:18 ; preds = %14
call void @div_state_lshift(%struct.div_state_* %state)
br label %19
; <label>:19 ; preds = %18
%20 = load i32* %i, align 4
%21 = add nsw i32 %20, 1
store i32 %21, i32* %i, align 4
br label %14
; <label>:22 ; preds = %14
%23 = load i32* %skipahead, align 4
store i32 %23, i32* %i, align 4
br label %24
; <label>:24 ; preds = %45, %22
%25 = load i32* %i, align 4
%26 = icmp slt i32 %25, 64
br i1 %26, label %27, label %48
; <label>:27 ; preds = %24
call void @div_state_lshift(%struct.div_state_* %state)
%28 = getelementptr inbounds %struct.div_state_* %state, i32 0, i32 0
%29 = load i64* %28, align 4
%30 = load i64* %3, align 8
%31 = icmp uge i64 %29, %30
br i1 %31, label %32, label %44
; <label>:32 ; preds = %27
%33 = getelementptr inbounds %struct.div_state_* %state, i32 0, i32 0
%34 = load i64* %33, align 4
%35 = load i64* %3, align 8
%36 = sub i64 %34, %35
%37 = getelementptr inbounds %struct.div_state_* %state, i32 0, i32 0
store i64 %36, i64* %37, align 4
%38 = load i32* %i, align 4
%39 = sub nsw i32 63, %38
%40 = zext i32 %39 to i64
%41 = shl i64 1, %40
%42 = load i64* %quotient, align 8
%43 = or i64 %42, %41
store i64 %43, i64* %quotient, align 8
br label %44
; <label>:44 ; preds = %32, %27
br label %45
; <label>:45 ; preds = %44
%46 = load i32* %i, align 4
%47 = add nsw i32 %46, 1
store i32 %47, i32* %i, align 4
br label %24
; <label>:48 ; preds = %24
%49 = load i64** %4, align 4
%50 = icmp ne i64* %49, null
br i1 %50, label %51, label %55
; <label>:51 ; preds = %48
%52 = getelementptr inbounds %struct.div_state_* %state, i32 0, i32 0
%53 = load i64* %52, align 4
%54 = load i64** %4, align 4
store i64 %53, i64* %54, align 4
br label %55
; <label>:55 ; preds = %51, %48
%56 = load i64* %quotient, align 8
store i64 %56, i64* %1
br label %57
; <label>:57 ; preds = %55, %10
%58 = load i64* %1
ret i64 %58
}
define internal i32 @clz64(i64 %x) nounwind ssp {
%1 = alloca i64, align 8
%total_bits = alloca i32, align 4
%zc = alloca i32, align 4
store i64 %x, i64* %1, align 8
store i32 64, i32* %total_bits, align 4
store i32 0, i32* %zc, align 4
br label %2
; <label>:2 ; preds = %16, %0
%3 = load i32* %zc, align 4
%4 = icmp slt i32 %3, 64
br i1 %4, label %5, label %14
; <label>:5 ; preds = %2
%6 = load i64* %1, align 8
%7 = load i32* %zc, align 4
%8 = sub nsw i32 64, %7
%9 = sub nsw i32 %8, 1
%10 = zext i32 %9 to i64
%11 = lshr i64 %6, %10
%12 = and i64 %11, 1
%13 = icmp eq i64 %12, 0
br label %14
; <label>:14 ; preds = %5, %2
%15 = phi i1 [ false, %2 ], [ %13, %5 ]
br i1 %15, label %16, label %19
; <label>:16 ; preds = %14
%17 = load i32* %zc, align 4
%18 = add nsw i32 %17, 1
store i32 %18, i32* %zc, align 4
br label %2
; <label>:19 ; preds = %14
%20 = load i32* %zc, align 4
ret i32 %20
}
define internal void @div_state_lshift(%struct.div_state_* %state) nounwind ssp {
%1 = alloca %struct.div_state_*, align 4
store %struct.div_state_* %state, %struct.div_state_** %1, align 4
%2 = load %struct.div_state_** %1, align 4
%3 = getelementptr inbounds %struct.div_state_* %2, i32 0, i32 0
%4 = load i64* %3, align 4
%5 = shl i64 %4, 1
%6 = load %struct.div_state_** %1, align 4
%7 = getelementptr inbounds %struct.div_state_* %6, i32 0, i32 1
%8 = load i64* %7, align 4
%9 = lshr i64 %8, 63
%10 = or i64 %5, %9
%11 = load %struct.div_state_** %1, align 4
%12 = getelementptr inbounds %struct.div_state_* %11, i32 0, i32 0
store i64 %10, i64* %12, align 4
%13 = load %struct.div_state_** %1, align 4
%14 = getelementptr inbounds %struct.div_state_* %13, i32 0, i32 1
%15 = load i64* %14, align 4
%16 = shl i64 %15, 1
%17 = load %struct.div_state_** %1, align 4
%18 = getelementptr inbounds %struct.div_state_* %17, i32 0, i32 1
store i64 %16, i64* %18, align 4
ret void
}
define i64 @sdivmod64(i64 %dividend, i64 %divisor, i64* %remainder) nounwind ssp {
%1 = alloca i64, align 8
%2 = alloca i64, align 8
%3 = alloca i64*, align 4
%signbitidx = alloca i32, align 4
%signed_dividend = alloca i32, align 4
%signed_divisor = alloca i32, align 4
%signed_result = alloca i32, align 4
%quotient = alloca i64, align 8
%udvd = alloca i64, align 8
%udvr = alloca i64, align 8
%uquotient = alloca i64, align 8
%uremainder = alloca i64, align 8
store i64 %dividend, i64* %1, align 8
store i64 %divisor, i64* %2, align 8
store i64* %remainder, i64** %3, align 4
store i32 63, i32* %signbitidx, align 4
%4 = load i64* %1, align 8
%5 = icmp slt i64 %4, 0
%6 = zext i1 %5 to i32
store i32 %6, i32* %signed_dividend, align 4
%7 = load i64* %2, align 8
%8 = icmp slt i64 %7, 0
%9 = zext i1 %8 to i32
store i32 %9, i32* %signed_divisor, align 4
%10 = load i32* %signed_divisor, align 4
%11 = load i32* %signed_dividend, align 4
%12 = xor i32 %10, %11
store i32 %12, i32* %signed_result, align 4
%13 = load i32* %signed_dividend, align 4
%14 = icmp ne i32 %13, 0
br i1 %14, label %15, label %18
; <label>:15 ; preds = %0
%16 = load i64* %1, align 8
%17 = sub nsw i64 0, %16
br label %20
; <label>:18 ; preds = %0
%19 = load i64* %1, align 8
br label %20
; <label>:20 ; preds = %18, %15
%21 = phi i64 [ %17, %15 ], [ %19, %18 ]
store i64 %21, i64* %udvd, align 8
%22 = load i32* %signed_divisor, align 4
%23 = icmp ne i32 %22, 0
br i1 %23, label %24, label %27
; <label>:24 ; preds = %20
%25 = load i64* %2, align 8
%26 = sub nsw i64 0, %25
br label %29
; <label>:27 ; preds = %20
%28 = load i64* %2, align 8
br label %29
; <label>:29 ; preds = %27, %24
%30 = phi i64 [ %26, %24 ], [ %28, %27 ]
store i64 %30, i64* %udvr, align 8
%31 = load i64* %udvd, align 8
%32 = load i64* %udvr, align 8
%33 = call i64 @udivmod64(i64 %31, i64 %32, i64* %uremainder)
store i64 %33, i64* %uquotient, align 8
%34 = load i32* %signed_result, align 4
%35 = icmp ne i32 %34, 0
br i1 %35, label %36, label %57
; <label>:36 ; preds = %29
%37 = load i64* %uremainder, align 8
%38 = icmp ne i64 %37, 0
br i1 %38, label %39, label %43
; <label>:39 ; preds = %36
%40 = load i64* %uquotient, align 8
%41 = sub nsw i64 0, %40
%42 = sub nsw i64 %41, 1
store i64 %42, i64* %quotient, align 8
br label %46
; <label>:43 ; preds = %36
%44 = load i64* %uquotient, align 8
%45 = sub nsw i64 0, %44
store i64 %45, i64* %quotient, align 8
br label %46
; <label>:46 ; preds = %43, %39
%47 = load i64** %3, align 4
%48 = icmp ne i64* %47, null
br i1 %48, label %49, label %56
; <label>:49 ; preds = %46
%50 = load i64* %1, align 8
%51 = load i64* %quotient, align 8
%52 = load i64* %2, align 8
%53 = mul i64 %51, %52
%54 = sub i64 %50, %53
%55 = load i64** %3, align 4
store i64 %54, i64* %55, align 4
br label %56
; <label>:56 ; preds = %49, %46
br label %73
; <label>:57 ; preds = %29
%58 = load i64* %uquotient, align 8
store i64 %58, i64* %quotient, align 8
%59 = load i64** %3, align 4
%60 = icmp ne i64* %59, null
br i1 %60, label %61, label %72
; <label>:61 ; preds = %57
%62 = load i32* %signed_divisor, align 4
%63 = icmp ne i32 %62, 0
br i1 %63, label %64, label %67
; <label>:64 ; preds = %61
%65 = load i64* %uremainder, align 8
%66 = sub i64 0, %65
br label %69
; <label>:67 ; preds = %61
%68 = load i64* %uremainder, align 8
br label %69
; <label>:69 ; preds = %67, %64
%70 = phi i64 [ %66, %64 ], [ %68, %67 ]
%71 = load i64** %3, align 4
store i64 %70, i64* %71, align 4
br label %72
; <label>:72 ; preds = %69, %57
br label %73
; <label>:73 ; preds = %72, %56
%74 = load i64* %quotient, align 8
ret i64 %74
}
define i64 @udiv64(i64 %dividend, i64 %divisor) nounwind ssp {
%1 = alloca i64, align 8
%2 = alloca i64, align 8
store i64 %dividend, i64* %1, align 8
store i64 %divisor, i64* %2, align 8
%3 = load i64* %1, align 8
%4 = load i64* %2, align 8
%5 = call i64 @udivmod64(i64 %3, i64 %4, i64* null)
ret i64 %5
}
define i64 @sdiv64(i64 %dividend, i64 %divisor) nounwind ssp {
%1 = alloca i64, align 8
%2 = alloca i64, align 8
store i64 %dividend, i64* %1, align 8
store i64 %divisor, i64* %2, align 8
%3 = load i64* %1, align 8
%4 = load i64* %2, align 8
%5 = call i64 @sdivmod64(i64 %3, i64 %4, i64* null)
ret i64 %5
}
define i64 @umod64(i64 %dividend, i64 %divisor) nounwind ssp {
%1 = alloca i64, align 8
%2 = alloca i64, align 8
%rem = alloca i64, align 8
store i64 %dividend, i64* %1, align 8
store i64 %divisor, i64* %2, align 8
%3 = load i64* %1, align 8
%4 = load i64* %2, align 8
%5 = call i64 @udivmod64(i64 %3, i64 %4, i64* %rem)
%6 = load i64* %rem, align 8
ret i64 %6
}
define i64 @smod64(i64 %dividend, i64 %divisor) nounwind ssp {
%1 = alloca i64, align 8
%2 = alloca i64, align 8
%rem = alloca i64, align 8
store i64 %dividend, i64* %1, align 8
store i64 %divisor, i64* %2, align 8
%3 = load i64* %1, align 8
%4 = load i64* %2, align 8
%5 = call i64 @sdivmod64(i64 %3, i64 %4, i64* %rem)
%6 = load i64* %rem, align 8
ret i64 %6
}

371
llvm/llrt/llrt_x86_64.ll Normal file
View file

@ -0,0 +1,371 @@
; ModuleID = 'udivmod64_x86_64.bc'
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
%struct.div_state_ = type { i64, i64 }
define i64 @udivmod64(i64 %dividend, i64 %divisor, i64* %remainder) nounwind uwtable ssp {
%1 = alloca i64, align 8
%2 = alloca i64, align 8
%3 = alloca i64, align 8
%4 = alloca i64*, align 8
%state = alloca %struct.div_state_, align 8
%quotient = alloca i64, align 8
%i = alloca i32, align 4
%skipahead = alloca i32, align 4
store i64 %dividend, i64* %2, align 8
store i64 %divisor, i64* %3, align 8
store i64* %remainder, i64** %4, align 8
%5 = getelementptr inbounds %struct.div_state_* %state, i32 0, i32 0
store i64 0, i64* %5, align 8
%6 = getelementptr inbounds %struct.div_state_* %state, i32 0, i32 1
%7 = load i64* %2, align 8
store i64 %7, i64* %6, align 8
store i64 0, i64* %quotient, align 8
%8 = load i64* %3, align 8
%9 = icmp eq i64 %8, 0
br i1 %9, label %10, label %11
; <label>:10 ; preds = %0
store i64 0, i64* %1
br label %57
; <label>:11 ; preds = %0
%12 = load i64* %2, align 8
%13 = call i32 @clz64(i64 %12)
store i32 %13, i32* %skipahead, align 4
store i32 0, i32* %i, align 4
br label %14
; <label>:14 ; preds = %19, %11
%15 = load i32* %i, align 4
%16 = load i32* %skipahead, align 4
%17 = icmp slt i32 %15, %16
br i1 %17, label %18, label %22
; <label>:18 ; preds = %14
call void @div_state_lshift(%struct.div_state_* %state)
br label %19
; <label>:19 ; preds = %18
%20 = load i32* %i, align 4
%21 = add nsw i32 %20, 1
store i32 %21, i32* %i, align 4
br label %14
; <label>:22 ; preds = %14
%23 = load i32* %skipahead, align 4
store i32 %23, i32* %i, align 4
br label %24
; <label>:24 ; preds = %45, %22
%25 = load i32* %i, align 4
%26 = icmp slt i32 %25, 64
br i1 %26, label %27, label %48
; <label>:27 ; preds = %24
call void @div_state_lshift(%struct.div_state_* %state)
%28 = getelementptr inbounds %struct.div_state_* %state, i32 0, i32 0
%29 = load i64* %28, align 8
%30 = load i64* %3, align 8
%31 = icmp uge i64 %29, %30
br i1 %31, label %32, label %44
; <label>:32 ; preds = %27
%33 = getelementptr inbounds %struct.div_state_* %state, i32 0, i32 0
%34 = load i64* %33, align 8
%35 = load i64* %3, align 8
%36 = sub i64 %34, %35
%37 = getelementptr inbounds %struct.div_state_* %state, i32 0, i32 0
store i64 %36, i64* %37, align 8
%38 = load i32* %i, align 4
%39 = sub nsw i32 63, %38
%40 = zext i32 %39 to i64
%41 = shl i64 1, %40
%42 = load i64* %quotient, align 8
%43 = or i64 %42, %41
store i64 %43, i64* %quotient, align 8
br label %44
; <label>:44 ; preds = %32, %27
br label %45
; <label>:45 ; preds = %44
%46 = load i32* %i, align 4
%47 = add nsw i32 %46, 1
store i32 %47, i32* %i, align 4
br label %24
; <label>:48 ; preds = %24
%49 = load i64** %4, align 8
%50 = icmp ne i64* %49, null
br i1 %50, label %51, label %55
; <label>:51 ; preds = %48
%52 = getelementptr inbounds %struct.div_state_* %state, i32 0, i32 0
%53 = load i64* %52, align 8
%54 = load i64** %4, align 8
store i64 %53, i64* %54, align 8
br label %55
; <label>:55 ; preds = %51, %48
%56 = load i64* %quotient, align 8
store i64 %56, i64* %1
br label %57
; <label>:57 ; preds = %55, %10
%58 = load i64* %1
ret i64 %58
}
define internal i32 @clz64(i64 %x) nounwind uwtable ssp {
%1 = alloca i64, align 8
%total_bits = alloca i32, align 4
%zc = alloca i32, align 4
store i64 %x, i64* %1, align 8
store i32 64, i32* %total_bits, align 4
store i32 0, i32* %zc, align 4
br label %2
; <label>:2 ; preds = %16, %0
%3 = load i32* %zc, align 4
%4 = icmp slt i32 %3, 64
br i1 %4, label %5, label %14
; <label>:5 ; preds = %2
%6 = load i64* %1, align 8
%7 = load i32* %zc, align 4
%8 = sub nsw i32 64, %7
%9 = sub nsw i32 %8, 1
%10 = zext i32 %9 to i64
%11 = lshr i64 %6, %10
%12 = and i64 %11, 1
%13 = icmp eq i64 %12, 0
br label %14
; <label>:14 ; preds = %5, %2
%15 = phi i1 [ false, %2 ], [ %13, %5 ]
br i1 %15, label %16, label %19
; <label>:16 ; preds = %14
%17 = load i32* %zc, align 4
%18 = add nsw i32 %17, 1
store i32 %18, i32* %zc, align 4
br label %2
; <label>:19 ; preds = %14
%20 = load i32* %zc, align 4
ret i32 %20
}
define internal void @div_state_lshift(%struct.div_state_* %state) nounwind uwtable ssp {
%1 = alloca %struct.div_state_*, align 8
store %struct.div_state_* %state, %struct.div_state_** %1, align 8
%2 = load %struct.div_state_** %1, align 8
%3 = getelementptr inbounds %struct.div_state_* %2, i32 0, i32 0
%4 = load i64* %3, align 8
%5 = shl i64 %4, 1
%6 = load %struct.div_state_** %1, align 8
%7 = getelementptr inbounds %struct.div_state_* %6, i32 0, i32 1
%8 = load i64* %7, align 8
%9 = lshr i64 %8, 63
%10 = or i64 %5, %9
%11 = load %struct.div_state_** %1, align 8
%12 = getelementptr inbounds %struct.div_state_* %11, i32 0, i32 0
store i64 %10, i64* %12, align 8
%13 = load %struct.div_state_** %1, align 8
%14 = getelementptr inbounds %struct.div_state_* %13, i32 0, i32 1
%15 = load i64* %14, align 8
%16 = shl i64 %15, 1
%17 = load %struct.div_state_** %1, align 8
%18 = getelementptr inbounds %struct.div_state_* %17, i32 0, i32 1
store i64 %16, i64* %18, align 8
ret void
}
define i64 @sdivmod64(i64 %dividend, i64 %divisor, i64* %remainder) nounwind uwtable ssp {
%1 = alloca i64, align 8
%2 = alloca i64, align 8
%3 = alloca i64*, align 8
%signbitidx = alloca i32, align 4
%signed_dividend = alloca i32, align 4
%signed_divisor = alloca i32, align 4
%signed_result = alloca i32, align 4
%quotient = alloca i64, align 8
%udvd = alloca i64, align 8
%udvr = alloca i64, align 8
%uquotient = alloca i64, align 8
%uremainder = alloca i64, align 8
store i64 %dividend, i64* %1, align 8
store i64 %divisor, i64* %2, align 8
store i64* %remainder, i64** %3, align 8
store i32 63, i32* %signbitidx, align 4
%4 = load i64* %1, align 8
%5 = icmp slt i64 %4, 0
%6 = zext i1 %5 to i32
store i32 %6, i32* %signed_dividend, align 4
%7 = load i64* %2, align 8
%8 = icmp slt i64 %7, 0
%9 = zext i1 %8 to i32
store i32 %9, i32* %signed_divisor, align 4
%10 = load i32* %signed_divisor, align 4
%11 = load i32* %signed_dividend, align 4
%12 = xor i32 %10, %11
store i32 %12, i32* %signed_result, align 4
%13 = load i32* %signed_dividend, align 4
%14 = icmp ne i32 %13, 0
br i1 %14, label %15, label %18
; <label>:15 ; preds = %0
%16 = load i64* %1, align 8
%17 = sub nsw i64 0, %16
br label %20
; <label>:18 ; preds = %0
%19 = load i64* %1, align 8
br label %20
; <label>:20 ; preds = %18, %15
%21 = phi i64 [ %17, %15 ], [ %19, %18 ]
store i64 %21, i64* %udvd, align 8
%22 = load i32* %signed_divisor, align 4
%23 = icmp ne i32 %22, 0
br i1 %23, label %24, label %27
; <label>:24 ; preds = %20
%25 = load i64* %2, align 8
%26 = sub nsw i64 0, %25
br label %29
; <label>:27 ; preds = %20
%28 = load i64* %2, align 8
br label %29
; <label>:29 ; preds = %27, %24
%30 = phi i64 [ %26, %24 ], [ %28, %27 ]
store i64 %30, i64* %udvr, align 8
%31 = load i64* %udvd, align 8
%32 = load i64* %udvr, align 8
%33 = call i64 @udivmod64(i64 %31, i64 %32, i64* %uremainder)
store i64 %33, i64* %uquotient, align 8
%34 = load i32* %signed_result, align 4
%35 = icmp ne i32 %34, 0
br i1 %35, label %36, label %57
; <label>:36 ; preds = %29
%37 = load i64* %uremainder, align 8
%38 = icmp ne i64 %37, 0
br i1 %38, label %39, label %43
; <label>:39 ; preds = %36
%40 = load i64* %uquotient, align 8
%41 = sub nsw i64 0, %40
%42 = sub nsw i64 %41, 1
store i64 %42, i64* %quotient, align 8
br label %46
; <label>:43 ; preds = %36
%44 = load i64* %uquotient, align 8
%45 = sub nsw i64 0, %44
store i64 %45, i64* %quotient, align 8
br label %46
; <label>:46 ; preds = %43, %39
%47 = load i64** %3, align 8
%48 = icmp ne i64* %47, null
br i1 %48, label %49, label %56
; <label>:49 ; preds = %46
%50 = load i64* %1, align 8
%51 = load i64* %quotient, align 8
%52 = load i64* %2, align 8
%53 = mul i64 %51, %52
%54 = sub i64 %50, %53
%55 = load i64** %3, align 8
store i64 %54, i64* %55, align 8
br label %56
; <label>:56 ; preds = %49, %46
br label %73
; <label>:57 ; preds = %29
%58 = load i64* %uquotient, align 8
store i64 %58, i64* %quotient, align 8
%59 = load i64** %3, align 8
%60 = icmp ne i64* %59, null
br i1 %60, label %61, label %72
; <label>:61 ; preds = %57
%62 = load i32* %signed_divisor, align 4
%63 = icmp ne i32 %62, 0
br i1 %63, label %64, label %67
; <label>:64 ; preds = %61
%65 = load i64* %uremainder, align 8
%66 = sub i64 0, %65
br label %69
; <label>:67 ; preds = %61
%68 = load i64* %uremainder, align 8
br label %69
; <label>:69 ; preds = %67, %64
%70 = phi i64 [ %66, %64 ], [ %68, %67 ]
%71 = load i64** %3, align 8
store i64 %70, i64* %71, align 8
br label %72
; <label>:72 ; preds = %69, %57
br label %73
; <label>:73 ; preds = %72, %56
%74 = load i64* %quotient, align 8
ret i64 %74
}
define i64 @udiv64(i64 %dividend, i64 %divisor) nounwind uwtable ssp {
%1 = alloca i64, align 8
%2 = alloca i64, align 8
store i64 %dividend, i64* %1, align 8
store i64 %divisor, i64* %2, align 8
%3 = load i64* %1, align 8
%4 = load i64* %2, align 8
%5 = call i64 @udivmod64(i64 %3, i64 %4, i64* null)
ret i64 %5
}
define i64 @sdiv64(i64 %dividend, i64 %divisor) nounwind uwtable ssp {
%1 = alloca i64, align 8
%2 = alloca i64, align 8
store i64 %dividend, i64* %1, align 8
store i64 %divisor, i64* %2, align 8
%3 = load i64* %1, align 8
%4 = load i64* %2, align 8
%5 = call i64 @sdivmod64(i64 %3, i64 %4, i64* null)
ret i64 %5
}
define i64 @umod64(i64 %dividend, i64 %divisor) nounwind uwtable ssp {
%1 = alloca i64, align 8
%2 = alloca i64, align 8
%rem = alloca i64, align 8
store i64 %dividend, i64* %1, align 8
store i64 %divisor, i64* %2, align 8
%3 = load i64* %1, align 8
%4 = load i64* %2, align 8
%5 = call i64 @udivmod64(i64 %3, i64 %4, i64* %rem)
%6 = load i64* %rem, align 8
ret i64 %6
}
define i64 @smod64(i64 %dividend, i64 %divisor) nounwind uwtable ssp {
%1 = alloca i64, align 8
%2 = alloca i64, align 8
%rem = alloca i64, align 8
store i64 %dividend, i64* %1, align 8
store i64 %divisor, i64* %2, align 8
%3 = load i64* %1, align 8
%4 = load i64* %2, align 8
%5 = call i64 @sdivmod64(i64 %3, i64 %4, i64* %rem)
%6 = load i64* %rem, align 8
ret i64 %6
}

View file

@ -1416,6 +1416,32 @@ if llvm.version >= (3, 3):
tests.append(TestMCJIT)
class TestLLRT(TestCase):
def test_llrt_divmod(self):
from llvm import llrt
m = lc.Module.new('testllrt')
longlong = lc.Type.int(64)
lfunc = m.add_function(lc.Type.function(longlong, [longlong, longlong]), 'foo')
bldr = lc.Builder.new(lfunc.append_basic_block(''))
bldr.ret(bldr.udiv(*lfunc.args))
llrt.replace_divmod64(lfunc)
rt = llrt.LLRT()
rt.install_symbols()
engine = le.EngineBuilder.new(m).create()
pointer = engine.get_pointer_to_function(lfunc)
from ctypes import CFUNCTYPE, c_uint64, c_int64
func = CFUNCTYPE(c_uint64, c_uint64, c_uint64)(pointer)
a, b = 98342, 2231
self.assertEqual(func(98342, 2231), 98342 // 2231)
rt.uninstall_symbols()
tests.append(TestLLRT)
class TestArith(TestCase):
'''
Test basic arithmetic support with LLVM MCJIT
@ -1447,10 +1473,12 @@ class TestArith(TestCase):
inttys = [Type.int(32), Type.int(64)]
flttys = [Type.float(), Type.double()]
for ty in inttys:
self.func_template(ty, iop)
for ty in flttys:
self.func_template(ty, fop)
if iop:
for ty in inttys:
self.func_template(ty, iop)
if fop:
for ty in flttys:
self.func_template(ty, fop)
def test_add(self):
self.template('add', 'fadd')
@ -1466,14 +1494,14 @@ class TestArith(TestCase):
print('skipped test for div')
print('known failure due to unresolved external symbol __udivdi3')
return
self.template('udiv', 'fdiv')
self.template('udiv', None) # 'fdiv')
def test_rem(self):
if BITS == 32:
print('skipped test for rem')
print('known failure due to unresolved external symbol __umoddi3')
return
self.template('urem', 'frem')
self.template('urem', None) # 'frem')
if llvm.version >= (3, 3):
# MCJIT is broken in 3.2
@ -1551,6 +1579,8 @@ class TestExact(TestCase):
tests.append(TestExact)
# ---------------------------------------------------------------------------
def run(verbosity=1):