diff mbox

[v2,1/1] pci-host: add educational driver

Message ID 1417773160-27247-1-git-send-email-jslaby@suse.cz
State New
Headers show

Commit Message

Jiri Slaby Dec. 5, 2014, 9:52 a.m. UTC
I am using qemu for teaching the Linux kernel at our university. I
wrote a simple PCI device that can answer to writes/reads, generate
interrupts and perform DMA. As I am dragging it locally over 2 years,
I am sending it to you now.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Cc: Paolo Bonzini <pbonzini@redhat.com>
---
 MAINTAINERS             |   5 +
 default-configs/pci.mak |   1 +
 docs/specs/edu.txt      | 105 +++++++++++++++
 hw/misc/Makefile.objs   |   1 +
 hw/misc/edu.c           | 351 ++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 463 insertions(+)
 create mode 100644 docs/specs/edu.txt
 create mode 100644 hw/misc/edu.c

Comments

Paolo Bonzini Dec. 5, 2014, 10:35 a.m. UTC | #1
Hi Jirka,

because this is supposed to be a poster of good QEMU practices, the
review is going to be a bit picky.  Most comments are trivial to apply.

> 
> +0x20 (RO) : status register, bitwise OR
> +	    0x01 -- computing factorial
> +

Perhaps bit 0 can be read-only, while bit 1 is set/clear by the guest
and is "raise interrupt at the end of factorial computation"?

> new file mode 100644
> index 000000000000..7b3d320eeba5
> --- /dev/null
> +++ b/hw/misc/edu.c
> @@ -0,0 +1,351 @@
> +/*
> + * QEMU educational PCI device
> + *
> + * Copyright (c) 2012-2014 Jiri Slaby
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> + * DEALINGS IN THE SOFTWARE.
> + */
> +
> +#include "hw/pci/pci.h"
> +#include "qemu/timer.h"
> +
> +#define DMA_START	0x40000
> +#define DMA_SIZE	4096
> +#define DMA_IRQ		0x00000100
> +
> +typedef struct {
> +    PCIDevice pdev;
> +    MemoryRegion mmio;
> +
> +    QemuThread thread;
> +    QemuMutex thr_mutex;
> +    QemuCond thr_cond;
> +    bool stopping;
> +
> +    uint32_t addr4;
> +    uint32_t fact;
> +#define EDU_STATUS_COMPUTING	0x1
> +    uint32_t status;
> +
> +    uint32_t irq_status;
> +
> +#define EDU_DMA_RUN		0x1
> +#define EDU_DMA_DIR(cmd)	(((cmd) & 0x2) >> 1)
> +# define EDU_DMA_FROM_PCI	0
> +# define EDU_DMA_TO_PCI		1
> +#define EDU_DMA_IRQ		0x4
> +    struct dma_state {
> +	    dma_addr_t src;
> +	    dma_addr_t dst;
> +	    dma_addr_t cnt;
> +	    dma_addr_t cmd;

QEMU has its own scripts/checkpatch.pl, and this patch wouldn't survive. :)

> +    } dma;
> +    QEMUTimer *dma_timer;

Please use timer_init instead of timer_new, and declare this as
"QEMUTimer dma_timer" instead of a pointer.  Nobody does this, everybody
should do this.

> +    char dma_buf[DMA_SIZE];
> +} EduState;
> +
> +static uint64_t dma_mask = (1UL << 28) - 1;

Please move this inside EduState.

> +static bool within(uint32_t addr, uint32_t start, uint32_t end)
> +{
> +	return start <= addr && addr < end;

4-space indent.

> +}
> +
> +static void edu_check_range(uint32_t addr, uint32_t size1, uint32_t start,
> +		uint32_t size2)
> +{
> +	uint32_t end1 = addr + size1;
> +	uint32_t end2 = start + size2;
> +
> +	if (within(addr, start, end2) &&
> +			end1 > addr && within(end1, start, end2))
> +		return;

More TAB indents, and missing braces around single-statement if-then-elses.

> +
> +	hw_error("EDU: DMA range 0x%.8x-0x%.8x out of bounds (0x%.8x-0x%.8x)!",
> +			addr, end1 - 1, start, end2 - 1);
> +}
> +
> +static dma_addr_t edu_clamp_addr(dma_addr_t addr)
> +{
> +	if (addr & ~dma_mask)
> +		printf("EDU: clamping DMA 0x%.16lx to 0x%.16lx!\n", addr,
> +				addr & dma_mask);
> +	return addr & dma_mask;
> +}
> +
> +static void edu_dma_timer(void *opaque)
> +{
> +	EduState *edu = opaque;

If you use timer_init, you might as well use container_of here.

> +	bool raise_irq = false;
> +
> +	if (!(edu->dma.cmd & EDU_DMA_RUN))
> +		return;
> +
> +	if (EDU_DMA_DIR(edu->dma.cmd) == EDU_DMA_FROM_PCI) {
> +		uint32_t dst = edu->dma.dst;
> +		edu_check_range(dst, edu->dma.cnt, DMA_START, DMA_SIZE);
> +		dst -= DMA_START;
> +		pci_dma_read(&edu->pdev, edu_clamp_addr(edu->dma.src),
> +				edu->dma_buf + dst, edu->dma.cnt);
> +	} else {
> +		uint32_t src = edu->dma.src;
> +		edu_check_range(src, edu->dma.cnt, DMA_START, DMA_SIZE);
> +		src -= DMA_START;
> +		pci_dma_write(&edu->pdev, edu_clamp_addr(edu->dma.dst),
> +				edu->dma_buf + src, edu->dma.cnt);
> +	}
> +
> +	edu->dma.cmd &= ~EDU_DMA_RUN;
> +	if (edu->dma.cmd & EDU_DMA_IRQ)
> +		raise_irq = true;
> +
> +	if (raise_irq) {
> +		edu->irq_status |= DMA_IRQ;
> +		pci_set_irq(&edu->pdev, 1);

Please wrap these two lines in a separate function.

> +	}
> +}
> +
> +static void dma_rw(EduState *edu, bool write, dma_addr_t *val, dma_addr_t *dma,
> +		bool timer)
> +{
> +	if (write && (edu->dma.cmd & EDU_DMA_RUN))
> +		return;
> +
> +	if (write)
> +		*dma = *val;
> +	else
> +		*val = *dma;
> +	if (timer)
> +		timer_mod(edu->dma_timer,
> +				qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 100);

Jackpot! Four statements, four missing braces! :D

> +}
> +
> +static uint64_t edu_mmio_read(void *opaque, hwaddr addr, unsigned size)
> +{
> +    EduState *edu = opaque;
> +    uint64_t val = ~0ULL;
> +
> +    if (size != 4)
> +	return val;
> +
> +    switch (addr) {
> +    case 0x00:
> +	val = 0x010000edu;
> +	break;
> +    case 0x04:
> +	val = edu->addr4;
> +	break;
> +    case 0x08:
> +	qemu_mutex_lock(&edu->thr_mutex);
> +	val = edu->fact;
> +	qemu_mutex_unlock(&edu->thr_mutex);

No need for the mutex.

> +	break;
> +    case 0x20:
> +	smp_rmb(); // against thread
> +	val = edu->status;

Better:

    /* Client is supposed to read status first, then fact.  */
    val = edu->status;
    smp_rmb();

The smp_rmb() has to go _after_ reading val = edu->status.


> +	break;
> +    case 0x24:
> +	val = edu->irq_status;
> +	break;
> +    case 0x80:
> +	dma_rw(edu, false, &val, &edu->dma.src, false);
> +	break;
> +    case 0x88:
> +	dma_rw(edu, false, &val, &edu->dma.dst, false);
> +	break;
> +    case 0x90:
> +	dma_rw(edu, false, &val, &edu->dma.cnt, false);
> +	break;
> +    case 0x98:
> +	dma_rw(edu, false, &val, &edu->dma.cmd, false);
> +	break;
> +    }
> +
> +    return val;
> +}
> +
> +static void edu_mmio_write(void *opaque, hwaddr addr, uint64_t val,
> +		unsigned size)
> +{
> +    EduState *edu = opaque;
> +
> +    if (addr < 0x80 && size != 4)
> +	return;
> +
> +    if (addr >= 0x80 && size != 4 && size != 8)
> +	return;
> +
> +    switch (addr) {
> +    case 0x04:
> +	edu->addr4 = ~val;
> +	break;
> +    case 0x08:
> +	if (edu->status & EDU_STATUS_COMPUTING)
> +		break;
> +	edu->status |= EDU_STATUS_COMPUTING;

atomic_or(&edu->status, EDU_STATUS_COMPUTING);

> +	qemu_mutex_lock(&edu->thr_mutex);
> +	edu->fact = val;

Probably the write should be ignored if edu->status has the computing
bit set, otherwise if you write 4 and 5 in rapid succession you will end
up with 24! in edu->fact.

> +	qemu_cond_signal(&edu->thr_cond);
> +	qemu_mutex_unlock(&edu->thr_mutex);
> +	break;

If you add the above suggestion to use interrupts, you can do:

    case 0x24:
        if (val & EDU_STATUS_FACT_IRQ)
            atomic_or(&edu->status, EDU_STATUS_FACT_IRQ);
        else
            atomic_and(&edu->status, ~EDU_STATUS_FACT_IRQ);
        break;

to leave bit 0 untouched.

> +    case 0x60:
> +	edu->irq_status |= val;
> +	pci_set_irq(&edu->pdev, 1);

Should not set irq if edu->irq_status is zero.

> +	break;
> +    case 0x64:
> +	edu->irq_status &= ~val;
> +	if (!edu->irq_status)
> +		pci_set_irq(&edu->pdev, 0);
> +	break;
> +    case 0x80:
> +	dma_rw(edu, true, &val, &edu->dma.src, false);
> +	break;
> +    case 0x88:
> +	dma_rw(edu, true, &val, &edu->dma.dst, false);
> +	break;
> +    case 0x90:
> +	dma_rw(edu, true, &val, &edu->dma.cnt, false);
> +	break;
> +    case 0x98:
> +	if (!(val & EDU_DMA_RUN))
> +		break;
> +	dma_rw(edu, true, &val, &edu->dma.cmd, true);
> +	break;
> +    }
> +}
> +
> +static const MemoryRegionOps edu_mmio_ops = {
> +    .read = edu_mmio_read,
> +    .write = edu_mmio_write,
> +    .endianness = DEVICE_NATIVE_ENDIAN,
> +};
> +
> +/*
> + * We purposedly use a thread, so that users are forced to wait for the status
> + * register.
> + */
> +static void *edu_fact_thread(void *opaque)
> +{
> +	EduState *edu = opaque;
> +
> +	while (1) {
> +		uint32_t val, ret = 1;
> +
> +		qemu_mutex_lock(&edu->thr_mutex);
> +		qemu_cond_wait(&edu->thr_cond, &edu->thr_mutex);
> +
> +		if (edu->stopping) {
> +			qemu_mutex_unlock(&edu->thr_mutex);
> +			break;
> +		}
> +
> +		val = edu->fact;

Should unlock mutex here...

> +		while (val > 0)
> +			ret *= val--;
> +		edu->fact = ret;
> +		qemu_mutex_unlock(&edu->thr_mutex);

... put smp_wmb() here...


> +		edu->status &= ~EDU_STATUS_COMPUTING;

... and use atomic_and here.

In order to raise an interrupt, for now you can just do pci_set_irq
wrapped with qemu_mutex_lock/unlock_iothread().  Later we can change it
to use a bottom half (QEMUBH), which is QEMU's way to schedule main
thread from a separate thread.

> +	}
> +
> +	return NULL;
> +}
> +
> +static int pci_edu_init(PCIDevice *pdev)
> +{
> +    EduState *edu = DO_UPCAST(EduState, pdev, pdev);
> +    uint8_t *pci_conf = pdev->config;
> +
> +    edu->dma_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, edu_dma_timer, edu);
> +
> +    qemu_mutex_init(&edu->thr_mutex);
> +    qemu_cond_init(&edu->thr_cond);
> +    qemu_thread_create(&edu->thread, "edu", edu_fact_thread,
> +                       edu, QEMU_THREAD_JOINABLE);
> +
> +    pci_set_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID, 0x0);
> +    pci_set_word(pci_conf + PCI_SUBSYSTEM_ID, 0x0);

These two lines are not needed.

> +    pci_config_set_interrupt_pin(pci_conf, 1);
> +
> +    memory_region_init_io(&edu->mmio, OBJECT(edu), &edu_mmio_ops, edu,
> +		    "edu-mmio", 1 << 20);
> +    pci_register_bar(pdev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &edu->mmio);
> +
> +    return 0;
> +}
> +
> +static void pci_edu_uninit(PCIDevice *pdev)
> +{
> +    EduState *edu = DO_UPCAST(EduState, pdev, pdev);
> +
> +    memory_region_destroy(&edu->mmio);
> +
> +    qemu_mutex_lock(&edu->thr_mutex);
> +    edu->stopping = true;
> +    qemu_mutex_unlock(&edu->thr_mutex);
> +    qemu_cond_signal(&edu->thr_cond);
> +    qemu_thread_join(&edu->thread);
> +
> +    qemu_cond_destroy(&edu->thr_cond);
> +    qemu_mutex_destroy(&edu->thr_mutex);
> +
> +    timer_del(edu->dma_timer);
> +    timer_free(edu->dma_timer);
> +}
> +
> +static void edu_obj_uint64(Object *obj, struct Visitor *v, void *opaque,
> +		const char *name, Error **errp)
> +{
> +    uint64_t *val = opaque;
> +
> +    visit_type_uint64(v, val, name, errp);
> +}
> +
> +static void edu_instance_init(Object *obj)
> +{
    EduState *edu = EDU(obj);

> +    object_property_add(obj, "dma_mask", "uint64", edu_obj_uint64,
> +		    edu_obj_uint64, NULL, &dma_mask, NULL);

... and use &edu->dma_mask here.

> +}
> +
> +static void edu_class_init(ObjectClass *class, void *data)
> +{
> +    PCIDeviceClass *k = PCI_DEVICE_CLASS(class);
> +
> +    k->init = pci_edu_init;
> +    k->exit = pci_edu_uninit;
> +    k->vendor_id = PCI_VENDOR_ID_QEMU;
> +    k->device_id = 0x11e8;
> +    k->revision = 0x10;
> +    k->class_id = PCI_CLASS_OTHERS;
> +}
> +
> +static void pci_edu_register_types(void)
> +{
> +    static const TypeInfo edu_info = {
> +	.name          = "edu",
> +	.parent        = TYPE_PCI_DEVICE,
> +	.instance_size = sizeof(EduState),
> +	.instance_init = edu_instance_init,
> +	.class_init    = edu_class_init,
> +    };
> +
> +    type_register_static(&edu_info);
> +}
> +type_init(pci_edu_register_types)
> 

Thanks,

Paolo
Jiri Slaby Dec. 5, 2014, 11:45 a.m. UTC | #2
On 12/05/2014, 11:35 AM, Paolo Bonzini wrote:
> Hi Jirka,
> 
> because this is supposed to be a poster of good QEMU practices, the
> review is going to be a bit picky.  Most comments are trivial to apply.

Hi, OK :).

>> --- /dev/null
>> +++ b/hw/misc/edu.c
>> @@ -0,0 +1,351 @@
...
>> +static void edu_dma_timer(void *opaque)
>> +{
>> +	EduState *edu = opaque;
> 
> If you use timer_init, you might as well use container_of here.

But how? I do not have the timer as a param, right?

>> +static uint64_t edu_mmio_read(void *opaque, hwaddr addr, unsigned size)
>> +{
>> +    EduState *edu = opaque;
>> +    uint64_t val = ~0ULL;
>> +
>> +    if (size != 4)
>> +	return val;
>> +
>> +    switch (addr) {
>> +    case 0x00:
>> +	val = 0x010000edu;
>> +	break;
>> +    case 0x04:
>> +	val = edu->addr4;
>> +	break;
>> +    case 0x08:
>> +	qemu_mutex_lock(&edu->thr_mutex);
>> +	val = edu->fact;
>> +	qemu_mutex_unlock(&edu->thr_mutex);
> 
> No need for the mutex.

But threads, as you wrote are not protected by the big lock. So
shouldn't this be at least atomic_get()?

>> +static void edu_mmio_write(void *opaque, hwaddr addr, uint64_t val,
>> +		unsigned size)
>> +{
>> +    EduState *edu = opaque;
>> +
>> +    if (addr < 0x80 && size != 4)
>> +	return;
>> +
>> +    if (addr >= 0x80 && size != 4 && size != 8)
>> +	return;
>> +
>> +    switch (addr) {
>> +    case 0x04:
>> +	edu->addr4 = ~val;
>> +	break;
>> +    case 0x08:
>> +	if (edu->status & EDU_STATUS_COMPUTING)
>> +		break;
>> +	edu->status |= EDU_STATUS_COMPUTING;
> 
> atomic_or(&edu->status, EDU_STATUS_COMPUTING);
> 
>> +	qemu_mutex_lock(&edu->thr_mutex);
>> +	edu->fact = val;
> 
> Probably the write should be ignored if edu->status has the computing
> bit set, otherwise if you write 4 and 5 in rapid succession you will end
> up with 24! in edu->fact.

But it is, AFAICS above?

>> +	qemu_cond_signal(&edu->thr_cond);
>> +	qemu_mutex_unlock(&edu->thr_mutex);
>> +	break;
> 
> If you add the above suggestion to use interrupts, you can do:
> 
>     case 0x24:
>         if (val & EDU_STATUS_FACT_IRQ)
>             atomic_or(&edu->status, EDU_STATUS_FACT_IRQ);
>         else
>             atomic_and(&edu->status, ~EDU_STATUS_FACT_IRQ);
>         break;
> 
> to leave bit 0 untouched.

Did you mean case 0x20?

>> +    case 0x60:
>> +	edu->irq_status |= val;
>> +	pci_set_irq(&edu->pdev, 1);
> 
> Should not set irq if edu->irq_status is zero.

I don't understand this. 0x60 is supposed to raise interrupts mostly
when edu->irq_status is 0.

thanks,
Paolo Bonzini Dec. 5, 2014, 12:07 p.m. UTC | #3
On 05/12/2014 12:45, Jiri Slaby wrote:
> On 12/05/2014, 11:35 AM, Paolo Bonzini wrote:
>> Hi Jirka,
>>
>> because this is supposed to be a poster of good QEMU practices, the
>> review is going to be a bit picky.  Most comments are trivial to apply.
> 
> Hi, OK :).
> 
>>> --- /dev/null
>>> +++ b/hw/misc/edu.c
>>> @@ -0,0 +1,351 @@
> ...
>>> +static void edu_dma_timer(void *opaque)
>>> +{
>>> +	EduState *edu = opaque;
>>
>> If you use timer_init, you might as well use container_of here.
> 
> But how? I do not have the timer as a param, right?

If you use timer_init, you can choose whether to pass the Timer or
EduState as the opaque.  With timer_new, you have to pass the opaque.
Any of the two can do.

>>> +static uint64_t edu_mmio_read(void *opaque, hwaddr addr, unsigned size)
>>> +{
>>> +    EduState *edu = opaque;
>>> +    uint64_t val = ~0ULL;
>>> +
>>> +    if (size != 4)
>>> +	return val;
>>> +
>>> +    switch (addr) {
>>> +    case 0x00:
>>> +	val = 0x010000edu;
>>> +	break;
>>> +    case 0x04:
>>> +	val = edu->addr4;
>>> +	break;
>>> +    case 0x08:
>>> +	qemu_mutex_lock(&edu->thr_mutex);
>>> +	val = edu->fact;
>>> +	qemu_mutex_unlock(&edu->thr_mutex);
>>
>> No need for the mutex.
> 
> But threads, as you wrote are not protected by the big lock. So
> shouldn't this be at least atomic_get()?

Yes, you can use atomic_read(), same kernel ACCESS_ONCE.

I was a bit confused because you had barriers, and tried to understand
what you meant since you had a smp_rmb() but no matching smp_wmb().  I
think both fact and status writes need to be under the mutex.  As to reads:

- either you put reads under the mutex

- or you put reads outside the mutex, and then you have to use barriers.


>>> +static void edu_mmio_write(void *opaque, hwaddr addr, uint64_t val,
>>> +		unsigned size)
>>> +{
>>> +    EduState *edu = opaque;
>>> +
>>> +    if (addr < 0x80 && size != 4)
>>> +	return;
>>> +
>>> +    if (addr >= 0x80 && size != 4 && size != 8)
>>> +	return;
>>> +
>>> +    switch (addr) {
>>> +    case 0x04:
>>> +	edu->addr4 = ~val;
>>> +	break;
>>> +    case 0x08:
>>> +	if (edu->status & EDU_STATUS_COMPUTING)
>>> +		break;
>>> +	edu->status |= EDU_STATUS_COMPUTING;
>>
>> atomic_or(&edu->status, EDU_STATUS_COMPUTING);
>>
>>> +	qemu_mutex_lock(&edu->thr_mutex);
>>> +	edu->fact = val;
>>
>> Probably the write should be ignored if edu->status has the computing
>> bit set, otherwise if you write 4 and 5 in rapid succession you will end
>> up with 24! in edu->fact.
> 
> But it is, AFAICS above?

Oops.  I was only looking inside the critical section.  Does it have to
be checked inside the mutex, in fact?

Also, the qemu_cond_wait has to be protected with

    while ((edu->status & EDU_STATUS_COMPUTING) == 0 && !edu->stopping)

to avoid problems with spurious wakeups.

>>> +	qemu_cond_signal(&edu->thr_cond);
>>> +	qemu_mutex_unlock(&edu->thr_mutex);
>>> +	break;
>>
>> If you add the above suggestion to use interrupts, you can do:
>>
>>     case 0x24:
>>         if (val & EDU_STATUS_FACT_IRQ)
>>             atomic_or(&edu->status, EDU_STATUS_FACT_IRQ);
>>         else
>>             atomic_and(&edu->status, ~EDU_STATUS_FACT_IRQ);
>>         break;
>>
>> to leave bit 0 untouched.
> 
> Did you mean case 0x20?

Yes, sorry.

>>> +    case 0x60:
>>> +	edu->irq_status |= val;
>>> +	pci_set_irq(&edu->pdev, 1);
>>
>> Should not set irq if edu->irq_status is zero.
> 
> I don't understand this. 0x60 is supposed to raise interrupts mostly
> when edu->irq_status is 0.

But if you write zero, i.e. edu->irq_status is zero after the OR, it
shouldn't raise a spurious interrupt, should it?

Paolo
diff mbox

Patch

diff --git a/MAINTAINERS b/MAINTAINERS
index bcb69e80d2dd..9b69289aa8e3 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -593,6 +593,11 @@  F: hw/net/opencores_eth.c
 
 Devices
 -------
+EDU
+M: Jiri Slaby <jslaby@suse.cz>
+S: Maintained
+F: hw/pci-host/edu.c
+
 IDE
 M: Kevin Wolf <kwolf@redhat.com>
 M: Stefan Hajnoczi <stefanha@redhat.com>
diff --git a/default-configs/pci.mak b/default-configs/pci.mak
index 91b1e92da53f..29130aba61d6 100644
--- a/default-configs/pci.mak
+++ b/default-configs/pci.mak
@@ -30,3 +30,4 @@  CONFIG_IPACK=y
 CONFIG_WDT_IB6300ESB=y
 CONFIG_PCI_TESTDEV=y
 CONFIG_NVME_PCI=y
+CONFIG_EDU=y
diff --git a/docs/specs/edu.txt b/docs/specs/edu.txt
new file mode 100644
index 000000000000..94409bb58297
--- /dev/null
+++ b/docs/specs/edu.txt
@@ -0,0 +1,105 @@ 
+
+EDU device
+==========
+
+This is an educational device for writing (kernel) drivers. Its original
+intention was to support the Linux kernel lectures taught at the Masaryk
+University. Students are given this virtual device and are expected to write a
+driver with I/Os, IRQs, DMAs and such.
+
+The devices behaves very similar to the PCI bridge present in the COMBO6 cards
+developed under the Liberouter wings. Both PCI device ID and PCI space is
+inherited from that device.
+
+Command line switches:
+    -device edu[,dma_mask=mask]
+
+    dma_mask makes the virtual device work with DMA addresses with the given
+    mask. For educational purposes, the device supports only 28 bits (256 MiB)
+    by default. Students shall set dma_mask for the device in the OS driver
+    properly.
+
+PCI specs
+---------
+
+PCI ID: 1234:11e8
+
+PCI Region 0:
+   I/O memory, 1 MB in size. Users are supposed to communicate with the card
+   through this memory.
+
+MMIO area spec
+--------------
+
+Only size == 4 accesses are allowed for addresses < 0x80. size == 4 or
+size == 8 for the rest.
+
+0x00 (RO) : identification (0xRRrr00edu)
+	    RR -- major version
+	    rr -- minor version
+
+0x04 (RW) : card liveness check
+	    It is a simple value inversion (~ C operator).
+
+0x08 (RW) : factorial computation
+	    The stored value is taken and factorial of it is put back here.
+	    This happens only after factorial bit in the status register (0x20
+	    below) is cleared.
+
+0x20 (RO) : status register, bitwise OR
+	    0x01 -- computing factorial
+
+0x24 (RO) : interrupt status register
+	    It contains values which raised the interrupt (see interrupt raise
+	    register below).
+
+0x60 (WO) : interrupt raise register
+	    Raise an interrupt. The value will be put to the interrupt status
+	    register (using bitwise OR).
+
+0x64 (WO) : interrupt acknowledge register
+	    Clear an interrupt. The value will be cleared from the interrupt
+	    status register. This needs to be done from the ISR to stop
+	    generating interrupts.
+
+0x80 (RW) : DMA source address
+	    Where to perform the DMA from.
+
+0x88 (RW) : DMA destination address
+	    Where to perform the DMA to.
+
+0x90 (RW) : DMA transfer count
+	    The size of the area to perform the DMA on.
+
+0x98 (RW) : DMA command register, bitwise OR
+	    0x01 -- start transfer
+	    0x02 -- direction (0: from RAM to EDU, 1: from EDU to RAM)
+	    0x04 -- raise interrupt 0x100 after finishing the DMA
+
+IRQ controller
+--------------
+An IRQ is generated when written to the interrupt raise register. The value
+appears in interrupt status register when the interrupt is raised and has to
+be written to the interrupt acknowledge register to lower it.
+
+DMA controller
+--------------
+One has to specify, source, destination, size, and start the transfer. One
+4096 bytes long buffer at offset 0x40000 is available in the EDU device. I.e.
+one can perform DMA to/from this space when programmed properly.
+
+Example of transferring a 100 byte block to and from the buffer using a given
+PCI address 'addr':
+addr     -> DMA source address
+0x40000  -> DMA destination address
+100      -> DMA transfer count
+1        -> DMA command register
+while (DMA command register & 1)
+	;
+
+0x40000  -> DMA source address
+addr+100 -> DMA destination address
+100      -> DMA transfer count
+3        -> DMA command register
+while (DMA command register & 1)
+	;
diff --git a/hw/misc/Makefile.objs b/hw/misc/Makefile.objs
index 979e532fdf85..1fe74c6eb3fa 100644
--- a/hw/misc/Makefile.objs
+++ b/hw/misc/Makefile.objs
@@ -41,3 +41,4 @@  obj-$(CONFIG_SLAVIO) += slavio_misc.o
 obj-$(CONFIG_ZYNQ) += zynq_slcr.o
 
 obj-$(CONFIG_PVPANIC) += pvpanic.o
+obj-$(CONFIG_EDU) += edu.o
diff --git a/hw/misc/edu.c b/hw/misc/edu.c
new file mode 100644
index 000000000000..7b3d320eeba5
--- /dev/null
+++ b/hw/misc/edu.c
@@ -0,0 +1,351 @@ 
+/*
+ * QEMU educational PCI device
+ *
+ * Copyright (c) 2012-2014 Jiri Slaby
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "hw/pci/pci.h"
+#include "qemu/timer.h"
+
+#define DMA_START	0x40000
+#define DMA_SIZE	4096
+#define DMA_IRQ		0x00000100
+
+typedef struct {
+    PCIDevice pdev;
+    MemoryRegion mmio;
+
+    QemuThread thread;
+    QemuMutex thr_mutex;
+    QemuCond thr_cond;
+    bool stopping;
+
+    uint32_t addr4;
+    uint32_t fact;
+#define EDU_STATUS_COMPUTING	0x1
+    uint32_t status;
+
+    uint32_t irq_status;
+
+#define EDU_DMA_RUN		0x1
+#define EDU_DMA_DIR(cmd)	(((cmd) & 0x2) >> 1)
+# define EDU_DMA_FROM_PCI	0
+# define EDU_DMA_TO_PCI		1
+#define EDU_DMA_IRQ		0x4
+    struct dma_state {
+	    dma_addr_t src;
+	    dma_addr_t dst;
+	    dma_addr_t cnt;
+	    dma_addr_t cmd;
+    } dma;
+    QEMUTimer *dma_timer;
+    char dma_buf[DMA_SIZE];
+} EduState;
+
+static uint64_t dma_mask = (1UL << 28) - 1;
+
+static bool within(uint32_t addr, uint32_t start, uint32_t end)
+{
+	return start <= addr && addr < end;
+}
+
+static void edu_check_range(uint32_t addr, uint32_t size1, uint32_t start,
+		uint32_t size2)
+{
+	uint32_t end1 = addr + size1;
+	uint32_t end2 = start + size2;
+
+	if (within(addr, start, end2) &&
+			end1 > addr && within(end1, start, end2))
+		return;
+
+	hw_error("EDU: DMA range 0x%.8x-0x%.8x out of bounds (0x%.8x-0x%.8x)!",
+			addr, end1 - 1, start, end2 - 1);
+}
+
+static dma_addr_t edu_clamp_addr(dma_addr_t addr)
+{
+	if (addr & ~dma_mask)
+		printf("EDU: clamping DMA 0x%.16lx to 0x%.16lx!\n", addr,
+				addr & dma_mask);
+	return addr & dma_mask;
+}
+
+static void edu_dma_timer(void *opaque)
+{
+	EduState *edu = opaque;
+	bool raise_irq = false;
+
+	if (!(edu->dma.cmd & EDU_DMA_RUN))
+		return;
+
+	if (EDU_DMA_DIR(edu->dma.cmd) == EDU_DMA_FROM_PCI) {
+		uint32_t dst = edu->dma.dst;
+		edu_check_range(dst, edu->dma.cnt, DMA_START, DMA_SIZE);
+		dst -= DMA_START;
+		pci_dma_read(&edu->pdev, edu_clamp_addr(edu->dma.src),
+				edu->dma_buf + dst, edu->dma.cnt);
+	} else {
+		uint32_t src = edu->dma.src;
+		edu_check_range(src, edu->dma.cnt, DMA_START, DMA_SIZE);
+		src -= DMA_START;
+		pci_dma_write(&edu->pdev, edu_clamp_addr(edu->dma.dst),
+				edu->dma_buf + src, edu->dma.cnt);
+	}
+
+	edu->dma.cmd &= ~EDU_DMA_RUN;
+	if (edu->dma.cmd & EDU_DMA_IRQ)
+		raise_irq = true;
+
+	if (raise_irq) {
+		edu->irq_status |= DMA_IRQ;
+		pci_set_irq(&edu->pdev, 1);
+	}
+}
+
+static void dma_rw(EduState *edu, bool write, dma_addr_t *val, dma_addr_t *dma,
+		bool timer)
+{
+	if (write && (edu->dma.cmd & EDU_DMA_RUN))
+		return;
+
+	if (write)
+		*dma = *val;
+	else
+		*val = *dma;
+	if (timer)
+		timer_mod(edu->dma_timer,
+				qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + 100);
+}
+
+static uint64_t edu_mmio_read(void *opaque, hwaddr addr, unsigned size)
+{
+    EduState *edu = opaque;
+    uint64_t val = ~0ULL;
+
+    if (size != 4)
+	return val;
+
+    switch (addr) {
+    case 0x00:
+	val = 0x010000edu;
+	break;
+    case 0x04:
+	val = edu->addr4;
+	break;
+    case 0x08:
+	qemu_mutex_lock(&edu->thr_mutex);
+	val = edu->fact;
+	qemu_mutex_unlock(&edu->thr_mutex);
+	break;
+    case 0x20:
+	smp_rmb(); // against thread
+	val = edu->status;
+	break;
+    case 0x24:
+	val = edu->irq_status;
+	break;
+    case 0x80:
+	dma_rw(edu, false, &val, &edu->dma.src, false);
+	break;
+    case 0x88:
+	dma_rw(edu, false, &val, &edu->dma.dst, false);
+	break;
+    case 0x90:
+	dma_rw(edu, false, &val, &edu->dma.cnt, false);
+	break;
+    case 0x98:
+	dma_rw(edu, false, &val, &edu->dma.cmd, false);
+	break;
+    }
+
+    return val;
+}
+
+static void edu_mmio_write(void *opaque, hwaddr addr, uint64_t val,
+		unsigned size)
+{
+    EduState *edu = opaque;
+
+    if (addr < 0x80 && size != 4)
+	return;
+
+    if (addr >= 0x80 && size != 4 && size != 8)
+	return;
+
+    switch (addr) {
+    case 0x04:
+	edu->addr4 = ~val;
+	break;
+    case 0x08:
+	if (edu->status & EDU_STATUS_COMPUTING)
+		break;
+	edu->status |= EDU_STATUS_COMPUTING;
+	qemu_mutex_lock(&edu->thr_mutex);
+	edu->fact = val;
+	qemu_cond_signal(&edu->thr_cond);
+	qemu_mutex_unlock(&edu->thr_mutex);
+	break;
+    case 0x60:
+	edu->irq_status |= val;
+	pci_set_irq(&edu->pdev, 1);
+	break;
+    case 0x64:
+	edu->irq_status &= ~val;
+	if (!edu->irq_status)
+		pci_set_irq(&edu->pdev, 0);
+	break;
+    case 0x80:
+	dma_rw(edu, true, &val, &edu->dma.src, false);
+	break;
+    case 0x88:
+	dma_rw(edu, true, &val, &edu->dma.dst, false);
+	break;
+    case 0x90:
+	dma_rw(edu, true, &val, &edu->dma.cnt, false);
+	break;
+    case 0x98:
+	if (!(val & EDU_DMA_RUN))
+		break;
+	dma_rw(edu, true, &val, &edu->dma.cmd, true);
+	break;
+    }
+}
+
+static const MemoryRegionOps edu_mmio_ops = {
+    .read = edu_mmio_read,
+    .write = edu_mmio_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+/*
+ * We purposedly use a thread, so that users are forced to wait for the status
+ * register.
+ */
+static void *edu_fact_thread(void *opaque)
+{
+	EduState *edu = opaque;
+
+	while (1) {
+		uint32_t val, ret = 1;
+
+		qemu_mutex_lock(&edu->thr_mutex);
+		qemu_cond_wait(&edu->thr_cond, &edu->thr_mutex);
+
+		if (edu->stopping) {
+			qemu_mutex_unlock(&edu->thr_mutex);
+			break;
+		}
+
+		val = edu->fact;
+		while (val > 0)
+			ret *= val--;
+
+		edu->fact = ret;
+		qemu_mutex_unlock(&edu->thr_mutex);
+		edu->status &= ~EDU_STATUS_COMPUTING;
+	}
+
+	return NULL;
+}
+
+static int pci_edu_init(PCIDevice *pdev)
+{
+    EduState *edu = DO_UPCAST(EduState, pdev, pdev);
+    uint8_t *pci_conf = pdev->config;
+
+    edu->dma_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, edu_dma_timer, edu);
+
+    qemu_mutex_init(&edu->thr_mutex);
+    qemu_cond_init(&edu->thr_cond);
+    qemu_thread_create(&edu->thread, "edu", edu_fact_thread,
+                       edu, QEMU_THREAD_JOINABLE);
+
+    pci_set_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID, 0x0);
+    pci_set_word(pci_conf + PCI_SUBSYSTEM_ID, 0x0);
+
+    pci_config_set_interrupt_pin(pci_conf, 1);
+
+    memory_region_init_io(&edu->mmio, OBJECT(edu), &edu_mmio_ops, edu,
+		    "edu-mmio", 1 << 20);
+    pci_register_bar(pdev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &edu->mmio);
+
+    return 0;
+}
+
+static void pci_edu_uninit(PCIDevice *pdev)
+{
+    EduState *edu = DO_UPCAST(EduState, pdev, pdev);
+
+    memory_region_destroy(&edu->mmio);
+
+    qemu_mutex_lock(&edu->thr_mutex);
+    edu->stopping = true;
+    qemu_mutex_unlock(&edu->thr_mutex);
+    qemu_cond_signal(&edu->thr_cond);
+    qemu_thread_join(&edu->thread);
+
+    qemu_cond_destroy(&edu->thr_cond);
+    qemu_mutex_destroy(&edu->thr_mutex);
+
+    timer_del(edu->dma_timer);
+    timer_free(edu->dma_timer);
+}
+
+static void edu_obj_uint64(Object *obj, struct Visitor *v, void *opaque,
+		const char *name, Error **errp)
+{
+    uint64_t *val = opaque;
+
+    visit_type_uint64(v, val, name, errp);
+}
+
+static void edu_instance_init(Object *obj)
+{
+    object_property_add(obj, "dma_mask", "uint64", edu_obj_uint64,
+		    edu_obj_uint64, NULL, &dma_mask, NULL);
+}
+
+static void edu_class_init(ObjectClass *class, void *data)
+{
+    PCIDeviceClass *k = PCI_DEVICE_CLASS(class);
+
+    k->init = pci_edu_init;
+    k->exit = pci_edu_uninit;
+    k->vendor_id = PCI_VENDOR_ID_QEMU;
+    k->device_id = 0x11e8;
+    k->revision = 0x10;
+    k->class_id = PCI_CLASS_OTHERS;
+}
+
+static void pci_edu_register_types(void)
+{
+    static const TypeInfo edu_info = {
+	.name          = "edu",
+	.parent        = TYPE_PCI_DEVICE,
+	.instance_size = sizeof(EduState),
+	.instance_init = edu_instance_init,
+	.class_init    = edu_class_init,
+    };
+
+    type_register_static(&edu_info);
+}
+type_init(pci_edu_register_types)