--- ../implementations/cextension/cpartial_asm.c
+++ ../implementations/cextension/cfull_asm.c
@@ -6,17 +6,16 @@
#define MAX_HEIGHT_UINT 34
-static PyObject* c_pascal_triangle_partial_asm(PyObject* self, PyObject* args) {
+static PyObject* c_pascal_triangle_full_asm(PyObject* self, PyObject* args) {
int height, verbose=0, return_list=0;
if (!PyArg_ParseTuple(args, "i|ii", &height, &verbose, &return_list)) return NULL;
+ // TODO MEDIUM: Make a better fix to support zero-based Pascal's Triangle
+ height++;
unsigned int line[height + 1];
-
- int start = height;
- int size;
int index;
- if(height > MAX_HEIGHT_UINT) {
+ if(height > MAX_HEIGHT_UINT + 1) {
PyErr_Format(PyExc_ValueError, "Unable to build Pascal's Triangle higher than %d", MAX_HEIGHT_UINT);
return NULL;
}
@@ -25,32 +24,48 @@
// Fill in line with 1 and put 0 in the end
"movl %0, %%ecx\n\t"
"movq %1, %%rdi\n\t"
- "incl %%ecx\n\t"
"movl $1, %%eax\n\t"
"cld\n\t"
"rep\n\t"
"stosl\n\t"
+ // End of filling
+ "movq $0, (%%rdi)\n\t" // TODO MEDIUM: There is no need for zero-padding
+ "movq $1, %%rcx\n\t" // line number being processed
+ "xorq %%rdx, %%rdx\n\t"
+ "movl %0, %%edx\n\t" // maximum number of lines
+ "movq %1, %%rdi\n\t" // start of "line" array
+ "outer:\n\t"
+ "movq %%rdx, %%rbx\n\t" // offset of current element
+ "subq %%rcx, %%rbx\n\t"
+ "xorq %%r8, %%r8\n\t"
+ // print %%rcx dwords starting from (%%rdi,%%rbx,4)
+ "inner:\n\t"
+ "movl 4(%%rdi,%%rbx,4), %%eax\n\t" // read line[index + 1]
+ "addl %%eax, (%%rdi,%%rbx,4)\n\t" // line[index] += line[index + 1]
+ "addq $1, %%rbx\n\t"
+ "incq %%r8\n\t"
+ "cmpq %%r8, %%rcx\n\t"
+ "jne inner\n\t"
+ "incq %%rcx\n\t"
+ "cmpq %%rdx, %%rcx\n\t"
+ "jl outer\n\t"
+ // print %%rdx dwords starting from (%%rdi)
: /**/
: "g" (height), "g" (line)
- : "eax", "ecx", "rdi"
+ : "eax", "rbx", "rcx", "rdx", "rdi", "r8"
);
- for (size = 0; size <= height; size++) {
- for (index = start + 1; index < height; index++) {
- line[index] += line[index + 1];
- }
-
- for (index = start + size; index >= start; index--) {
+ // Some code that used results to ensure that optimization is not applied and assembly code is actually executing
+ if(verbose) {
+ for (index = height - 1; index >= 0; index--) {
if(verbose) printf("%u ", line[index]);
}
if(verbose) printf("\n");
-
- start--;
}
if(return_list) {
- PyObject* py_line = PyList_New(height + 1);
- for (index = height; index >= 0; index--) {
+ PyObject* py_line = PyList_New(height);
+ for (index = height - 1; index >= 0; index--) {
PyList_SetItem(py_line, index, PyInt_FromLong((long) line[index]));
}
return py_line;
@@ -62,10 +77,10 @@
static PyMethodDef cpascal_triangle_methods[] = {
- {"c_pascal_triangle_partial_asm", c_pascal_triangle_partial_asm, METH_VARARGS},
+ {"c_pascal_triangle_full_asm", c_pascal_triangle_full_asm, METH_VARARGS},
{NULL, NULL}
};
-void initcpartial_asm(void) {
- (void) Py_InitModule("cpartial_asm", cpascal_triangle_methods);
+void initcfull_asm(void) {
+ (void) Py_InitModule("cfull_asm", cpascal_triangle_methods);
}