98 | | #. Add the two head file, kernel/include/core/hypercall.h and libpok/include/core/hypercall.h, and build the corresponding structure and declaration. |
99 | | #. implement the corresponding functions in corresponding .c files. That is: |
100 | | #. kernel/arch/x86/hypercall.c, using this file to build the hypercall_gate. |
101 | | #. kernel/core/hypercall.c, in this file, the pok_core_hypercall will deal with the hypercall. |
102 | | #. modify the kernel/include/arch/x86/interrupt.h, add the support of hypercall handler. |
103 | | #. add libpok/arch/x86/hypercall.c, in this file, we implement the pok_do_hypercall, which will invoke the soft interrupt. |
104 | | #. modified interrelated Makefile to assure those file will work when the BSP is x86-qemu-vmm, also will not influence the normal POK, when the BSP is not x86-qemu-vmm. |
| 98 | # Add the two head file, kernel/include/core/hypercall.h and libpok/include/core/hypercall.h, and build the corresponding structure and declaration. |
| 99 | # implement the corresponding functions in corresponding .c files. That is: |
| 100 | # kernel/arch/x86/hypercall.c, using this file to build the hypercall_gate. |
| 101 | # kernel/core/hypercall.c, in this file, the pok_core_hypercall will deal with the hypercall. |
| 102 | # modify the kernel/include/arch/x86/interrupt.h, add the support of hypercall handler. |
| 103 | # add libpok/arch/x86/hypercall.c, in this file, we implement the pok_do_hypercall, which will invoke the soft interrupt. |
| 104 | # modified interrelated Makefile to assure those file will work when the BSP is x86-qemu-vmm, also will not influence the normal POK, when the BSP is not x86-qemu-vmm. |
| 202 | = The interrupt delivery = |
| 203 | |
| 204 | = The POK part = |
| 205 | |
| 206 | |
| 207 | To delivery the interrupt to RTEMS, firstly we should mask the corresponding interrupt in vCPU when it occurs. Then when the vCPU is resuming, goto user space to execute the interrupt handler of RTEMS. See this [http://huaiyusched.github.io/rtems/2014/04/07/the-brief-design-and-outline/ blog]. |
| 208 | = The interrupt mask function = |
| 209 | |
| 210 | |
| 211 | The interrupt mask function is do_IRQ. Once an interrupt occurs, it will be invoked to check the irq_desc in every vCPU. For example: |
| 212 | {{{ |
| 213 | INTERRUPT_HANDLER(pit_interrupt) |
| 214 | { |
| 215 | //uint8_t vector; |
| 216 | //vector = 32; |
| 217 | (void) frame; |
| 218 | pok_pic_eoi (PIT_IRQ); |
| 219 | do_IRQ(32); |
| 220 | CLOCK_HANDLER; |
| 221 | } |
| 222 | }}} |
| 223 | Let's grab do_IRQ and check it out. |
| 224 | {{{ |
| 225 | #ifdef POK_NEEDS_X86_VMM |
| 226 | |
| 227 | /* |
| 228 | * Deal with the interrupt if the interrupt should be handler by guest |
| 229 | */ |
| 230 | void do_IRQ(uint8_t vector) |
| 231 | { |
| 232 | do_IRQ_guest(vector); |
| 233 | } |
| 234 | |
| 235 | /* |
| 236 | * Decide the interrupt should be send to guest or not |
| 237 | */ |
| 238 | void do_IRQ_guest(uint8_t vector) |
| 239 | { |
| 240 | uint8_t i,j; |
| 241 | struct vcpu *v; |
| 242 | for(i = 0 ; i < POK_CONFIG_NB_PARTITIONS ; i++) |
| 243 | { |
| 244 | v = pok_partitions[i].vcpu; |
| 245 | for (j = 0 ; j< 16; j++) |
| 246 | { |
| 247 | if(v->arch.irqdesc[i].vector == vector) |
| 248 | { |
| 249 | v->arch.irqdesc[i].pending = TRUE; |
| 250 | v->pending = TRUE; |
| 251 | v->arch.irqdesc[i].count++; |
| 252 | } |
| 253 | } |
| 254 | } |
| 255 | } |
| 256 | |
| 257 | #endif /* POK_NEEDS_X86_VMM */ |
| 258 | }}} |
| 259 | The do_IRQ is supposed to do some common treatment for interrupt, but I can't see any of them. and do_IRQ_guest will, as I said before, check the irq_desc array in vCPU, if there is a register of this interrupt, mask it, counter++. |
| 260 | For more details please see this [http://huaiyusched.github.io/2014/07/29/the-interrupt-register-function-for-vcpu blog]. |
| 261 | = The upcall function = |
| 262 | |
| 263 | This upcall function's name may confuse some people. When the vCPU resumes, the POK kernel check the vCPU states, if there is any interrupt pending, then up this call to RTEMS in user space. After the interrupt handling in RTEMS, the POK should return back to kernel, and go to normal program to continue it's work. this is most difficult part. |
| 264 | == When the upcall will be invoked? == |
| 265 | |
| 266 | |
| 267 | When the partition resumes, the hanging interrupt of this partition should be tickled. So: |
| 268 | #. The corresponding partition resumes. |
| 269 | #. interrupt hang in this vCPU. |
| 270 | == What the upcall_irq exactly do? == |
| 271 | |
| 272 | |
| 273 | {{{ |
| 274 | uint32_t upcall_irq(interrupt_frame* frame) |
| 275 | { |
| 276 | struct vcpu *v; |
| 277 | uint8_t i; |
| 278 | uint32_t _eip; |
| 279 | uint32_t user_space_handler; |
| 280 | v = pok_partitions[POK_SCHED_CURRENT_PARTITION].vcpu; |
| 281 | _eip = frame->eip; // if no interrupt happened, return the point of normal program; |
| 282 | user_space_handler = v->arch.handler; |
| 283 | user_space_handler -= pok_partitions[POK_SCHED_CURRENT_PARTITION].base_addr; |
| 284 | if(v->pending != 0) |
| 285 | { |
| 286 | for(i=0;i<15;i++) |
| 287 | { |
| 288 | if(v->arch.irqdesc[i].counter != 0) |
| 289 | { |
| 290 | save_interrupt_vcpu(v,frame); |
| 291 | __upcall_irq(frame, i, (uint32_t) user_space_handler); |
| 292 | v->arch.irqdesc[i].counter --; |
| 293 | return user_space_handler; //if any interrupt occours, return the point of interrupt handler; |
| 294 | } |
| 295 | } |
| 296 | } |
| 297 | return _eip; |
| 298 | } |
| 299 | |
| 300 | }}} |
| 301 | |
| 302 | # Check the pending bit in vcpu, if it mask there is an interrupt hanging on it then go to step to. |
| 303 | # Check the irq_desc. |
| 304 | # If the counter not equal to zero, then save the interrupt context to interrupt frame, and invoke __upcall_irq.== What the __upcall_irq do? == |
| 305 | |
| 306 | |
| 307 | The some_function always be some core or assemble function of some_function. So the upcall_irq is the key to understand the mechanism of it. So it'simportant to understand this function. |
| 308 | |
| 309 | {{{ |
| 310 | void __upcall_irq(interrupt_frame* frame,uint8_t vector, uint32_t handler) |
| 311 | { |
| 312 | frame->eax = vector; //put the irq number to eax |
| 313 | frame->eip = handler; //Set the eip as handler |
| 314 | } |
| 315 | }}} |
| 316 | |
| 317 | Then it returns to common interrupt handler entry for Guest OS. |
| 318 | |
| 319 | As you can see, the __upcall_irq will change the eax register as the vector of current interrupt. Also, a very very important step, change the eip register in stack for iret. |
| 320 | |
| 321 | As we all know, the iret will resume the eip, cs, eflags from stack. so we change the eip in stack, and when the iret exert, the program will go as this eip point. What's this eip? The handler in vCPU. And the original eip is saved in save_interrupt_vcpu function. |
| 322 | |
| 323 | This is all of the work in POK kernel of interrupt delivery. |
| 324 | = The RTEMS Part = |
| 325 | |
| 326 | |
| 327 | We assume that the upcall function works well, and now the the eip is pointing to the handler that the RTEMS register before. |
| 328 | = The Handler of RTEMS = |
| 329 | |
| 330 | The handler we register is an common entry for whole RTEMS interrupt. The POK kernel will also pass the irq vector in eax register. |
| 331 | So the handler should first, read the irq number from register. |
| 332 | Here is an example of handler in Guest OS: |
| 333 | {{{ |
| 334 | void handle_irq() |
| 335 | { |
| 336 | uint32_t irq=0; |
| 337 | do{ |
| 338 | asm( |
| 339 | "add %%eax,%0 \n" \ |
| 340 | :"=m"(irq) \ |
| 341 | : |
| 342 | :"%eax"); |
| 343 | }while(0); |
| 344 | switch(irq) |
| 345 | { |
| 346 | case PIT_IRQ: |
| 347 | tick_counter++; |
| 348 | printf( "Clock gettick: %u \n",tick_counter); |
| 349 | pok_hypercall1( POK_HYPERCALL_IRQ_DO_IRET,0); |
| 350 | break; |
| 351 | |
| 352 | |
| 353 | default: |
| 354 | pok_hypercall1( POK_HYPERCALL_IRQ_DO_IRET,0); |
| 355 | } |
| 356 | } |
| 357 | }}} |
| 358 | |
| 359 | As we can see, we can invoke corresponding interrupt handle of RTEMS in user space. |
| 360 | |
| 361 | However, we can't just using iret instruction in user space to return this interrupt handler. we need a specific iret. |
| 362 | = Do_IRET in Hypercall = |
| 363 | |
| 364 | After handle of interrupt, we should invoke HYPERCALL_IRQ_DO_IREQ. This hypercall will invoke do_iret in POK kernel. so we use the hypercall to change our into kernel space again. |
| 365 | |
| 366 | What the do_iret do? |
| 367 | {{{ |
| 368 | /* |
| 369 | * This do_iret will check the irq_desc,and according to the irq_desc, construct interrupt frame, then iret to execute handler of Guest OS |
| 370 | */ |
| 371 | pok_ret_t do_iret(interrupt_frame *frame) |
| 372 | { |
| 373 | struct vcpu *v; |
| 374 | uint8_t i; |
| 375 | uint32_t user_space_handler; |
| 376 | |
| 377 | v = pok_partitions[POK_SCHED_CURRENT_PARTITION].vcpu; |
| 378 | |
| 379 | user_space_handler = v->arch.handler; |
| 380 | user_space_handler -= pok_partitions[POK_SCHED_CURRENT_PARTITION].base_addr; |
| 381 | if(v->pending != 0) |
| 382 | { |
| 383 | for(i=0;i<15;i++) |
| 384 | { |
| 385 | while(v->arch.irqdesc[i].counter != 0) |
| 386 | { |
| 387 | __upcall_irq(frame, i, (uint32_t) user_space_handler); |
| 388 | v->arch.irqdesc[i].counter--; |
| 389 | return POK_ERRNO_OK; |
| 390 | } |
| 391 | } |
| 392 | v->pending = 0; |
| 393 | } |
| 394 | else if(v->pending == 0) |
| 395 | { |
| 396 | restore_interrupt_vcpu(v, frame); |
| 397 | } |
| 398 | |
| 399 | return POK_ERRNO_OK; |
| 400 | } |
| 401 | }}} |
| 402 | The do_iret is similar with the upcall_irq function in some extent. So let me introduce it briefly. |
| 403 | |
| 404 | The do_iret will check the pending bit and irq_desc structure, if there is no more interrupt hanging in this CPU, then resume the interrupt context from vCPU, if not, return to handler of Guest OS again. |
| 405 | |
| 406 | For more details, please see this [http://huaiyusched.github.io/2014/08/08/the-current-workflow-of-interrupt-handling blog].= Summary = |
| 407 | |
| 408 | |
| 409 | Here is an illustration: |
| 410 | [wiki:File:The_work_flow_of_interrupt_handler_in_vCPU.jpg File:The work flow of interrupt handler in vCPU.jpg] |
| 411 | |
| 412 | As we can see in this illustration, the The work flow of interrupt handler in vCPU is clear. |
| 413 | = To be improve in future = |
| 414 | = In POK = |
| 415 | |
| 416 | # The vCPU in POK kernel is only work for interrupt handling. we should improve it to be a part of POK processor manager. Add macro of CURRENT vCPU. Schedule function etc.. |
| 417 | # The upcall_irq is not working for now. I promise I will fix it and make it running on RTEMS. To complete the hypervisor this year will benefit the successor next year.= In RTEMS = |
| 418 | |
| 419 | # As for now, we used time interrupt to test interrupt virtualization. But we should set the time interrupt not delivery to RTEMS in future, because the cost is to heavy. This request us to build a virtual time system. |
| 420 | # When the upcall_irq works, we should use the paravirtualization layer's API to rebuild the syscall in RTEMS. |